Magellan Linux

Contents of /trunk/kernel-lts/patches-3.10/0116-3.10.17-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2395 - (show annotations) (download)
Mon Feb 3 12:41:31 2014 UTC (10 years, 2 months ago) by niro
File size: 119145 byte(s)
-copied
1 diff --git a/Makefile b/Makefile
2 index e9528d2..5c7d3d6 100644
3 --- a/Makefile
4 +++ b/Makefile
5 @@ -1,6 +1,6 @@
6 VERSION = 3
7 PATCHLEVEL = 10
8 -SUBLEVEL = 16
9 +SUBLEVEL = 17
10 EXTRAVERSION =
11 NAME = TOSSUG Baby Fish
12
13 diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h
14 index 442ce5d..43de302 100644
15 --- a/arch/arc/include/asm/delay.h
16 +++ b/arch/arc/include/asm/delay.h
17 @@ -53,11 +53,10 @@ static inline void __udelay(unsigned long usecs)
18 {
19 unsigned long loops;
20
21 - /* (long long) cast ensures 64 bit MPY - real or emulated
22 + /* (u64) cast ensures 64 bit MPY - real or emulated
23 * HZ * 4295 is pre-evaluated by gcc - hence only 2 mpy ops
24 */
25 - loops = ((long long)(usecs * 4295 * HZ) *
26 - (long long)(loops_per_jiffy)) >> 32;
27 + loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32;
28
29 __delay(loops);
30 }
31 diff --git a/arch/arc/include/asm/sections.h b/arch/arc/include/asm/sections.h
32 index 6fc1159..764f1e3 100644
33 --- a/arch/arc/include/asm/sections.h
34 +++ b/arch/arc/include/asm/sections.h
35 @@ -11,7 +11,6 @@
36
37 #include <asm-generic/sections.h>
38
39 -extern char _int_vec_base_lds[];
40 extern char __arc_dccm_base[];
41 extern char __dtb_start[];
42
43 diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
44 index f158197..b6a8c2d 100644
45 --- a/arch/arc/include/asm/spinlock.h
46 +++ b/arch/arc/include/asm/spinlock.h
47 @@ -45,7 +45,14 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
48
49 static inline void arch_spin_unlock(arch_spinlock_t *lock)
50 {
51 - lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
52 + unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__;
53 +
54 + __asm__ __volatile__(
55 + " ex %0, [%1] \n"
56 + : "+r" (tmp)
57 + : "r"(&(lock->slock))
58 + : "memory");
59 +
60 smp_mb();
61 }
62
63 diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
64 index 3242082..30c9baf 100644
65 --- a/arch/arc/include/asm/uaccess.h
66 +++ b/arch/arc/include/asm/uaccess.h
67 @@ -43,7 +43,7 @@
68 * Because it essentially checks if buffer end is within limit and @len is
69 * non-ngeative, which implies that buffer start will be within limit too.
70 *
71 - * The reason for rewriting being, for majorit yof cases, @len is generally
72 + * The reason for rewriting being, for majority of cases, @len is generally
73 * compile time constant, causing first sub-expression to be compile time
74 * subsumed.
75 *
76 @@ -53,7 +53,7 @@
77 *
78 */
79 #define __user_ok(addr, sz) (((sz) <= TASK_SIZE) && \
80 - (((addr)+(sz)) <= get_fs()))
81 + ((addr) <= (get_fs() - (sz))))
82 #define __access_ok(addr, sz) (unlikely(__kernel_ok) || \
83 likely(__user_ok((addr), (sz))))
84
85 diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
86 index 006dec3..0f944f0 100644
87 --- a/arch/arc/kernel/head.S
88 +++ b/arch/arc/kernel/head.S
89 @@ -27,11 +27,16 @@ stext:
90 ; Don't clobber r0-r4 yet. It might have bootloader provided info
91 ;-------------------------------------------------------------------
92
93 + sr @_int_vec_base_lds, [AUX_INTR_VEC_BASE]
94 +
95 #ifdef CONFIG_SMP
96 ; Only Boot (Master) proceeds. Others wait in platform dependent way
97 ; IDENTITY Reg [ 3 2 1 0 ]
98 ; (cpu-id) ^^^ => Zero for UP ARC700
99 ; => #Core-ID if SMP (Master 0)
100 + ; Note that non-boot CPUs might not land here if halt-on-reset and
101 + ; instead breath life from @first_lines_of_secondary, but we still
102 + ; need to make sure only boot cpu takes this path.
103 GET_CPU_ID r5
104 cmp r5, 0
105 jnz arc_platform_smp_wait_to_boot
106 @@ -96,6 +101,8 @@ stext:
107
108 first_lines_of_secondary:
109
110 + sr @_int_vec_base_lds, [AUX_INTR_VEC_BASE]
111 +
112 ; setup per-cpu idle task as "current" on this CPU
113 ld r0, [@secondary_idle_tsk]
114 SET_CURR_TASK_ON_CPU r0, r1
115 diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
116 index 8115fa5..a199471 100644
117 --- a/arch/arc/kernel/irq.c
118 +++ b/arch/arc/kernel/irq.c
119 @@ -24,7 +24,6 @@
120 * -Needed for each CPU (hence not foldable into init_IRQ)
121 *
122 * what it does ?
123 - * -setup Vector Table Base Reg - in case Linux not linked at 0x8000_0000
124 * -Disable all IRQs (on CPU side)
125 * -Optionally, setup the High priority Interrupts as Level 2 IRQs
126 */
127 @@ -32,8 +31,6 @@ void __cpuinit arc_init_IRQ(void)
128 {
129 int level_mask = 0;
130
131 - write_aux_reg(AUX_INTR_VEC_BASE, _int_vec_base_lds);
132 -
133 /* Disable all IRQs: enable them as devices request */
134 write_aux_reg(AUX_IENABLE, 0);
135
136 diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c
137 index c6a81c5..0851604 100644
138 --- a/arch/arc/kernel/ptrace.c
139 +++ b/arch/arc/kernel/ptrace.c
140 @@ -92,7 +92,7 @@ static int genregs_set(struct task_struct *target,
141 REG_IN_CHUNK(scratch, callee, ptregs); /* pt_regs[bta..orig_r8] */
142 REG_IN_CHUNK(callee, efa, cregs); /* callee_regs[r25..r13] */
143 REG_IGNORE_ONE(efa); /* efa update invalid */
144 - REG_IN_ONE(stop_pc, &ptregs->ret); /* stop_pc: PC update */
145 + REG_IGNORE_ONE(stop_pc); /* PC updated via @ret */
146
147 return ret;
148 }
149 diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
150 index b2b3731..2d7786b 100644
151 --- a/arch/arc/kernel/setup.c
152 +++ b/arch/arc/kernel/setup.c
153 @@ -47,10 +47,7 @@ void __cpuinit read_arc_build_cfg_regs(void)
154 READ_BCR(AUX_IDENTITY, cpu->core);
155
156 cpu->timers = read_aux_reg(ARC_REG_TIMERS_BCR);
157 -
158 cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
159 - if (cpu->vec_base == 0)
160 - cpu->vec_base = (unsigned int)_int_vec_base_lds;
161
162 READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space);
163 cpu->uncached_base = uncached_space.start << 24;
164 diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
165 index ee6ef2f..7e95e1a 100644
166 --- a/arch/arc/kernel/signal.c
167 +++ b/arch/arc/kernel/signal.c
168 @@ -101,7 +101,6 @@ SYSCALL_DEFINE0(rt_sigreturn)
169 {
170 struct rt_sigframe __user *sf;
171 unsigned int magic;
172 - int err;
173 struct pt_regs *regs = current_pt_regs();
174
175 /* Always make any pending restarted system calls return -EINTR */
176 @@ -119,15 +118,16 @@ SYSCALL_DEFINE0(rt_sigreturn)
177 if (!access_ok(VERIFY_READ, sf, sizeof(*sf)))
178 goto badframe;
179
180 - err = restore_usr_regs(regs, sf);
181 - err |= __get_user(magic, &sf->sigret_magic);
182 - if (err)
183 + if (__get_user(magic, &sf->sigret_magic))
184 goto badframe;
185
186 if (unlikely(is_do_ss_needed(magic)))
187 if (restore_altstack(&sf->uc.uc_stack))
188 goto badframe;
189
190 + if (restore_usr_regs(regs, sf))
191 + goto badframe;
192 +
193 /* Don't restart from sigreturn */
194 syscall_wont_restart(regs);
195
196 @@ -191,6 +191,15 @@ setup_rt_frame(int signo, struct k_sigaction *ka, siginfo_t *info,
197 return 1;
198
199 /*
200 + * w/o SA_SIGINFO, struct ucontext is partially populated (only
201 + * uc_mcontext/uc_sigmask) for kernel's normal user state preservation
202 + * during signal handler execution. This works for SA_SIGINFO as well
203 + * although the semantics are now overloaded (the same reg state can be
204 + * inspected by userland: but are they allowed to fiddle with it ?
205 + */
206 + err |= stash_usr_regs(sf, regs, set);
207 +
208 + /*
209 * SA_SIGINFO requires 3 args to signal handler:
210 * #1: sig-no (common to any handler)
211 * #2: struct siginfo
212 @@ -213,14 +222,6 @@ setup_rt_frame(int signo, struct k_sigaction *ka, siginfo_t *info,
213 magic = MAGIC_SIGALTSTK;
214 }
215
216 - /*
217 - * w/o SA_SIGINFO, struct ucontext is partially populated (only
218 - * uc_mcontext/uc_sigmask) for kernel's normal user state preservation
219 - * during signal handler execution. This works for SA_SIGINFO as well
220 - * although the semantics are now overloaded (the same reg state can be
221 - * inspected by userland: but are they allowed to fiddle with it ?
222 - */
223 - err |= stash_usr_regs(sf, regs, set);
224 err |= __put_user(magic, &sf->sigret_magic);
225 if (err)
226 return err;
227 diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c
228 index 4cd8163..116d3e0 100644
229 --- a/arch/arc/kernel/unaligned.c
230 +++ b/arch/arc/kernel/unaligned.c
231 @@ -233,6 +233,12 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs,
232 regs->status32 &= ~STATUS_DE_MASK;
233 } else {
234 regs->ret += state.instr_len;
235 +
236 + /* handle zero-overhead-loop */
237 + if ((regs->ret == regs->lp_end) && (regs->lp_count)) {
238 + regs->ret = regs->lp_start;
239 + regs->lp_count--;
240 + }
241 }
242
243 return 0;
244 diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
245 index bfc198c..863c892 100644
246 --- a/arch/arm/include/asm/jump_label.h
247 +++ b/arch/arm/include/asm/jump_label.h
248 @@ -16,7 +16,7 @@
249
250 static __always_inline bool arch_static_branch(struct static_key *key)
251 {
252 - asm goto("1:\n\t"
253 + asm_volatile_goto("1:\n\t"
254 JUMP_LABEL_NOP "\n\t"
255 ".pushsection __jump_table, \"aw\"\n\t"
256 ".word 1b, %l[l_yes], %c0\n\t"
257 diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
258 index 5bc2615..ab1fe3b 100644
259 --- a/arch/arm/kernel/process.c
260 +++ b/arch/arm/kernel/process.c
261 @@ -466,7 +466,7 @@ int in_gate_area_no_mm(unsigned long addr)
262 {
263 return in_gate_area(NULL, addr);
264 }
265 -#define is_gate_vma(vma) ((vma) = &gate_vma)
266 +#define is_gate_vma(vma) ((vma) == &gate_vma)
267 #else
268 #define is_gate_vma(vma) 0
269 #endif
270 diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
271 index 4d6d77e..e194f95 100644
272 --- a/arch/mips/include/asm/jump_label.h
273 +++ b/arch/mips/include/asm/jump_label.h
274 @@ -22,7 +22,7 @@
275
276 static __always_inline bool arch_static_branch(struct static_key *key)
277 {
278 - asm goto("1:\tnop\n\t"
279 + asm_volatile_goto("1:\tnop\n\t"
280 "nop\n\t"
281 ".pushsection __jump_table, \"aw\"\n\t"
282 WORD_INSN " 1b, %l[l_yes], %0\n\t"
283 diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
284 index 04e47c6..b3f87a3 100644
285 --- a/arch/parisc/kernel/traps.c
286 +++ b/arch/parisc/kernel/traps.c
287 @@ -805,14 +805,14 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
288 else {
289
290 /*
291 - * The kernel should never fault on its own address space.
292 + * The kernel should never fault on its own address space,
293 + * unless pagefault_disable() was called before.
294 */
295
296 - if (fault_space == 0)
297 + if (fault_space == 0 && !in_atomic())
298 {
299 pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC);
300 parisc_terminate("Kernel Fault", regs, code, fault_address);
301 -
302 }
303 }
304
305 diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
306 index ae098c4..f016bb6 100644
307 --- a/arch/powerpc/include/asm/jump_label.h
308 +++ b/arch/powerpc/include/asm/jump_label.h
309 @@ -19,7 +19,7 @@
310
311 static __always_inline bool arch_static_branch(struct static_key *key)
312 {
313 - asm goto("1:\n\t"
314 + asm_volatile_goto("1:\n\t"
315 "nop\n\t"
316 ".pushsection __jump_table, \"aw\"\n\t"
317 JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
318 diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
319 index b02f91e..7bcd4d6 100644
320 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
321 +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
322 @@ -1054,7 +1054,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
323 BEGIN_FTR_SECTION
324 mfspr r8, SPRN_DSCR
325 ld r7, HSTATE_DSCR(r13)
326 - std r8, VCPU_DSCR(r7)
327 + std r8, VCPU_DSCR(r9)
328 mtspr SPRN_DSCR, r7
329 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
330
331 diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
332 index 6c32190..346b1c8 100644
333 --- a/arch/s390/include/asm/jump_label.h
334 +++ b/arch/s390/include/asm/jump_label.h
335 @@ -15,7 +15,7 @@
336
337 static __always_inline bool arch_static_branch(struct static_key *key)
338 {
339 - asm goto("0: brcl 0,0\n"
340 + asm_volatile_goto("0: brcl 0,0\n"
341 ".pushsection __jump_table, \"aw\"\n"
342 ASM_ALIGN "\n"
343 ASM_PTR " 0b, %l[label], %0\n"
344 diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
345 index 5080d16..ec2e2e2 100644
346 --- a/arch/sparc/include/asm/jump_label.h
347 +++ b/arch/sparc/include/asm/jump_label.h
348 @@ -9,7 +9,7 @@
349
350 static __always_inline bool arch_static_branch(struct static_key *key)
351 {
352 - asm goto("1:\n\t"
353 + asm_volatile_goto("1:\n\t"
354 "nop\n\t"
355 "nop\n\t"
356 ".pushsection __jump_table, \"aw\"\n\t"
357 diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
358 index e99ac27..4af181d 100644
359 --- a/arch/x86/include/asm/cpufeature.h
360 +++ b/arch/x86/include/asm/cpufeature.h
361 @@ -365,7 +365,7 @@ extern const char * const x86_power_flags[32];
362 static __always_inline __pure bool __static_cpu_has(u16 bit)
363 {
364 #if __GNUC__ > 4 || __GNUC_MINOR__ >= 5
365 - asm goto("1: jmp %l[t_no]\n"
366 + asm_volatile_goto("1: jmp %l[t_no]\n"
367 "2:\n"
368 ".section .altinstructions,\"a\"\n"
369 " .long 1b - .\n"
370 diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
371 index cccd07f..779c2ef 100644
372 --- a/arch/x86/include/asm/e820.h
373 +++ b/arch/x86/include/asm/e820.h
374 @@ -29,7 +29,7 @@ extern void e820_setup_gap(void);
375 extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
376 unsigned long start_addr, unsigned long long end_addr);
377 struct setup_data;
378 -extern void parse_e820_ext(struct setup_data *data);
379 +extern void parse_e820_ext(u64 phys_addr, u32 data_len);
380
381 #if defined(CONFIG_X86_64) || \
382 (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
383 diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
384 index 3a16c14..0297669 100644
385 --- a/arch/x86/include/asm/jump_label.h
386 +++ b/arch/x86/include/asm/jump_label.h
387 @@ -13,7 +13,7 @@
388
389 static __always_inline bool arch_static_branch(struct static_key *key)
390 {
391 - asm goto("1:"
392 + asm_volatile_goto("1:"
393 STATIC_KEY_INITIAL_NOP
394 ".pushsection __jump_table, \"aw\" \n\t"
395 _ASM_ALIGN "\n\t"
396 diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
397 index d32abea..174da5f 100644
398 --- a/arch/x86/kernel/e820.c
399 +++ b/arch/x86/kernel/e820.c
400 @@ -658,15 +658,18 @@ __init void e820_setup_gap(void)
401 * boot_params.e820_map, others are passed via SETUP_E820_EXT node of
402 * linked list of struct setup_data, which is parsed here.
403 */
404 -void __init parse_e820_ext(struct setup_data *sdata)
405 +void __init parse_e820_ext(u64 phys_addr, u32 data_len)
406 {
407 int entries;
408 struct e820entry *extmap;
409 + struct setup_data *sdata;
410
411 + sdata = early_memremap(phys_addr, data_len);
412 entries = sdata->len / sizeof(struct e820entry);
413 extmap = (struct e820entry *)(sdata->data);
414 __append_e820_map(extmap, entries);
415 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
416 + early_iounmap(sdata, data_len);
417 printk(KERN_INFO "e820: extended physical RAM map:\n");
418 e820_print_map("extended");
419 }
420 diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
421 index 56f7fcf..91964c6 100644
422 --- a/arch/x86/kernel/setup.c
423 +++ b/arch/x86/kernel/setup.c
424 @@ -426,25 +426,23 @@ static void __init reserve_initrd(void)
425 static void __init parse_setup_data(void)
426 {
427 struct setup_data *data;
428 - u64 pa_data;
429 + u64 pa_data, pa_next;
430
431 pa_data = boot_params.hdr.setup_data;
432 while (pa_data) {
433 - u32 data_len, map_len;
434 + u32 data_len, map_len, data_type;
435
436 map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK),
437 (u64)sizeof(struct setup_data));
438 data = early_memremap(pa_data, map_len);
439 data_len = data->len + sizeof(struct setup_data);
440 - if (data_len > map_len) {
441 - early_iounmap(data, map_len);
442 - data = early_memremap(pa_data, data_len);
443 - map_len = data_len;
444 - }
445 + data_type = data->type;
446 + pa_next = data->next;
447 + early_iounmap(data, map_len);
448
449 - switch (data->type) {
450 + switch (data_type) {
451 case SETUP_E820_EXT:
452 - parse_e820_ext(data);
453 + parse_e820_ext(pa_data, data_len);
454 break;
455 case SETUP_DTB:
456 add_dtb(pa_data);
457 @@ -452,8 +450,7 @@ static void __init parse_setup_data(void)
458 default:
459 break;
460 }
461 - pa_data = data->next;
462 - early_iounmap(data, map_len);
463 + pa_data = pa_next;
464 }
465 }
466
467 diff --git a/drivers/char/random.c b/drivers/char/random.c
468 index 35487e8..81eefa1 100644
469 --- a/drivers/char/random.c
470 +++ b/drivers/char/random.c
471 @@ -1462,12 +1462,11 @@ ctl_table random_table[] = {
472
473 static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
474
475 -static int __init random_int_secret_init(void)
476 +int random_int_secret_init(void)
477 {
478 get_random_bytes(random_int_secret, sizeof(random_int_secret));
479 return 0;
480 }
481 -late_initcall(random_int_secret_init);
482
483 /*
484 * Get a random word for internal kernel use only. Similar to urandom but
485 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
486 index 2667d6d..ab95259 100644
487 --- a/drivers/gpu/drm/i915/intel_display.c
488 +++ b/drivers/gpu/drm/i915/intel_display.c
489 @@ -3946,8 +3946,6 @@ static void intel_connector_check_state(struct intel_connector *connector)
490 * consider. */
491 void intel_connector_dpms(struct drm_connector *connector, int mode)
492 {
493 - struct intel_encoder *encoder = intel_attached_encoder(connector);
494 -
495 /* All the simple cases only support two dpms states. */
496 if (mode != DRM_MODE_DPMS_ON)
497 mode = DRM_MODE_DPMS_OFF;
498 @@ -3958,10 +3956,8 @@ void intel_connector_dpms(struct drm_connector *connector, int mode)
499 connector->dpms = mode;
500
501 /* Only need to change hw state when actually enabled */
502 - if (encoder->base.crtc)
503 - intel_encoder_dpms(encoder, mode);
504 - else
505 - WARN_ON(encoder->connectors_active != false);
506 + if (connector->encoder)
507 + intel_encoder_dpms(to_intel_encoder(connector->encoder), mode);
508
509 intel_modeset_check_state(connector->dev);
510 }
511 diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
512 index 2068df1..8b6b0ba 100644
513 --- a/drivers/gpu/drm/radeon/evergreen.c
514 +++ b/drivers/gpu/drm/radeon/evergreen.c
515 @@ -2990,7 +2990,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
516 rdev->config.evergreen.sx_max_export_size = 256;
517 rdev->config.evergreen.sx_max_export_pos_size = 64;
518 rdev->config.evergreen.sx_max_export_smx_size = 192;
519 - rdev->config.evergreen.max_hw_contexts = 8;
520 + rdev->config.evergreen.max_hw_contexts = 4;
521 rdev->config.evergreen.sq_num_cf_insts = 2;
522
523 rdev->config.evergreen.sc_prim_fifo_size = 0x40;
524 diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
525 index 9490972..150e318 100644
526 --- a/drivers/gpu/drm/radeon/evergreend.h
527 +++ b/drivers/gpu/drm/radeon/evergreend.h
528 @@ -1104,7 +1104,7 @@
529 * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
530 */
531 # define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20)
532 - /* 0 - SRC_ADDR
533 + /* 0 - DST_ADDR
534 * 1 - GDS
535 */
536 # define PACKET3_CP_DMA_ENGINE(x) ((x) << 27)
537 @@ -1119,7 +1119,7 @@
538 # define PACKET3_CP_DMA_CP_SYNC (1 << 31)
539 /* COMMAND */
540 # define PACKET3_CP_DMA_DIS_WC (1 << 21)
541 -# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
542 +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22)
543 /* 0 - none
544 * 1 - 8 in 16
545 * 2 - 8 in 32
546 diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
547 index 79df558..2fd2241 100644
548 --- a/drivers/gpu/drm/radeon/r600d.h
549 +++ b/drivers/gpu/drm/radeon/r600d.h
550 @@ -1259,7 +1259,7 @@
551 */
552 # define PACKET3_CP_DMA_CP_SYNC (1 << 31)
553 /* COMMAND */
554 -# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
555 +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22)
556 /* 0 - none
557 * 1 - 8 in 16
558 * 2 - 8 in 32
559 diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
560 index bbed4af..f9ebf2b 100644
561 --- a/drivers/gpu/drm/radeon/radeon_test.c
562 +++ b/drivers/gpu/drm/radeon/radeon_test.c
563 @@ -37,8 +37,8 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
564 struct radeon_bo **gtt_obj = NULL;
565 struct radeon_fence *fence = NULL;
566 uint64_t gtt_addr, vram_addr;
567 - unsigned i, n, size;
568 - int r, ring;
569 + unsigned n, size;
570 + int i, r, ring;
571
572 switch (flag) {
573 case RADEON_TEST_COPY_DMA:
574 diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
575 index 8c68e67..495f41f 100644
576 --- a/drivers/gpu/drm/radeon/sid.h
577 +++ b/drivers/gpu/drm/radeon/sid.h
578 @@ -928,7 +928,7 @@
579 * 6. COMMAND [30:21] | BYTE_COUNT [20:0]
580 */
581 # define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20)
582 - /* 0 - SRC_ADDR
583 + /* 0 - DST_ADDR
584 * 1 - GDS
585 */
586 # define PACKET3_CP_DMA_ENGINE(x) ((x) << 27)
587 @@ -943,7 +943,7 @@
588 # define PACKET3_CP_DMA_CP_SYNC (1 << 31)
589 /* COMMAND */
590 # define PACKET3_CP_DMA_DIS_WC (1 << 21)
591 -# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
592 +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22)
593 /* 0 - none
594 * 1 - 8 in 16
595 * 2 - 8 in 32
596 diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
597 index 98814d1..3288f13 100644
598 --- a/drivers/hwmon/applesmc.c
599 +++ b/drivers/hwmon/applesmc.c
600 @@ -230,6 +230,7 @@ static int send_argument(const char *key)
601
602 static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len)
603 {
604 + u8 status, data = 0;
605 int i;
606
607 if (send_command(cmd) || send_argument(key)) {
608 @@ -237,6 +238,7 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len)
609 return -EIO;
610 }
611
612 + /* This has no effect on newer (2012) SMCs */
613 if (send_byte(len, APPLESMC_DATA_PORT)) {
614 pr_warn("%.4s: read len fail\n", key);
615 return -EIO;
616 @@ -250,6 +252,17 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len)
617 buffer[i] = inb(APPLESMC_DATA_PORT);
618 }
619
620 + /* Read the data port until bit0 is cleared */
621 + for (i = 0; i < 16; i++) {
622 + udelay(APPLESMC_MIN_WAIT);
623 + status = inb(APPLESMC_CMD_PORT);
624 + if (!(status & 0x01))
625 + break;
626 + data = inb(APPLESMC_DATA_PORT);
627 + }
628 + if (i)
629 + pr_warn("flushed %d bytes, last value is: %d\n", i, data);
630 +
631 return 0;
632 }
633
634 diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
635 index e02f9e3..b06be8e 100644
636 --- a/drivers/i2c/busses/i2c-omap.c
637 +++ b/drivers/i2c/busses/i2c-omap.c
638 @@ -941,6 +941,9 @@ omap_i2c_isr_thread(int this_irq, void *dev_id)
639 /*
640 * ProDB0017052: Clear ARDY bit twice
641 */
642 + if (stat & OMAP_I2C_STAT_ARDY)
643 + omap_i2c_ack_stat(dev, OMAP_I2C_STAT_ARDY);
644 +
645 if (stat & (OMAP_I2C_STAT_ARDY | OMAP_I2C_STAT_NACK |
646 OMAP_I2C_STAT_AL)) {
647 omap_i2c_ack_stat(dev, (OMAP_I2C_STAT_RRDY |
648 diff --git a/drivers/watchdog/ts72xx_wdt.c b/drivers/watchdog/ts72xx_wdt.c
649 index b8a9245..9ad2bd3 100644
650 --- a/drivers/watchdog/ts72xx_wdt.c
651 +++ b/drivers/watchdog/ts72xx_wdt.c
652 @@ -310,7 +310,8 @@ static long ts72xx_wdt_ioctl(struct file *file, unsigned int cmd,
653
654 case WDIOC_GETSTATUS:
655 case WDIOC_GETBOOTSTATUS:
656 - return put_user(0, p);
657 + error = put_user(0, p);
658 + break;
659
660 case WDIOC_KEEPALIVE:
661 ts72xx_wdt_kick(wdt);
662 diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
663 index 17f3064..1e2288d 100644
664 --- a/fs/btrfs/inode.c
665 +++ b/fs/btrfs/inode.c
666 @@ -8146,7 +8146,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
667
668
669 /* check for collisions, even if the name isn't there */
670 - ret = btrfs_check_dir_item_collision(root, new_dir->i_ino,
671 + ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
672 new_dentry->d_name.name,
673 new_dentry->d_name.len);
674
675 diff --git a/fs/dcache.c b/fs/dcache.c
676 index f09b908..da89cdf 100644
677 --- a/fs/dcache.c
678 +++ b/fs/dcache.c
679 @@ -2724,6 +2724,17 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
680 return memcpy(buffer, temp, sz);
681 }
682
683 +char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
684 +{
685 + char *end = buffer + buflen;
686 + /* these dentries are never renamed, so d_lock is not needed */
687 + if (prepend(&end, &buflen, " (deleted)", 11) ||
688 + prepend_name(&end, &buflen, &dentry->d_name) ||
689 + prepend(&end, &buflen, "/", 1))
690 + end = ERR_PTR(-ENAMETOOLONG);
691 + return end;
692 +}
693 +
694 /*
695 * Write full pathname from the root of the filesystem into the buffer.
696 */
697 diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
698 index c081e34..03e9beb 100644
699 --- a/fs/ext4/xattr.c
700 +++ b/fs/ext4/xattr.c
701 @@ -1350,6 +1350,8 @@ retry:
702 s_min_extra_isize) {
703 tried_min_extra_isize++;
704 new_extra_isize = s_min_extra_isize;
705 + kfree(is); is = NULL;
706 + kfree(bs); bs = NULL;
707 goto retry;
708 }
709 error = -1;
710 diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
711 index a3f868a..4e5f332 100644
712 --- a/fs/hugetlbfs/inode.c
713 +++ b/fs/hugetlbfs/inode.c
714 @@ -916,14 +916,8 @@ static int get_hstate_idx(int page_size_log)
715 return h - hstates;
716 }
717
718 -static char *hugetlb_dname(struct dentry *dentry, char *buffer, int buflen)
719 -{
720 - return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)",
721 - dentry->d_name.name);
722 -}
723 -
724 static struct dentry_operations anon_ops = {
725 - .d_dname = hugetlb_dname
726 + .d_dname = simple_dname
727 };
728
729 /*
730 diff --git a/fs/statfs.c b/fs/statfs.c
731 index c219e733..083dc0a 100644
732 --- a/fs/statfs.c
733 +++ b/fs/statfs.c
734 @@ -94,7 +94,7 @@ retry:
735
736 int fd_statfs(int fd, struct kstatfs *st)
737 {
738 - struct fd f = fdget(fd);
739 + struct fd f = fdget_raw(fd);
740 int error = -EBADF;
741 if (f.file) {
742 error = vfs_statfs(&f.file->f_path, st);
743 diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
744 index 842de22..ded4299 100644
745 --- a/include/linux/compiler-gcc4.h
746 +++ b/include/linux/compiler-gcc4.h
747 @@ -65,6 +65,21 @@
748 #define __visible __attribute__((externally_visible))
749 #endif
750
751 +/*
752 + * GCC 'asm goto' miscompiles certain code sequences:
753 + *
754 + * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
755 + *
756 + * Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
757 + * Fixed in GCC 4.8.2 and later versions.
758 + *
759 + * (asm goto is automatically volatile - the naming reflects this.)
760 + */
761 +#if GCC_VERSION <= 40801
762 +# define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0)
763 +#else
764 +# define asm_volatile_goto(x...) do { asm goto(x); } while (0)
765 +#endif
766
767 #ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
768 #if GCC_VERSION >= 40400
769 diff --git a/include/linux/dcache.h b/include/linux/dcache.h
770 index 1a6bb81..9be5ac9 100644
771 --- a/include/linux/dcache.h
772 +++ b/include/linux/dcache.h
773 @@ -332,6 +332,7 @@ extern int d_validate(struct dentry *, struct dentry *);
774 * helper function for dentry_operations.d_dname() members
775 */
776 extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...);
777 +extern char *simple_dname(struct dentry *, char *, int);
778
779 extern char *__d_path(const struct path *, const struct path *, char *, int);
780 extern char *d_absolute_path(const struct path *, char *, int);
781 diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
782 index c4d870b..19c19a5 100644
783 --- a/include/linux/ipc_namespace.h
784 +++ b/include/linux/ipc_namespace.h
785 @@ -22,7 +22,7 @@ struct ipc_ids {
786 int in_use;
787 unsigned short seq;
788 unsigned short seq_max;
789 - struct rw_semaphore rw_mutex;
790 + struct rw_semaphore rwsem;
791 struct idr ipcs_idr;
792 int next_id;
793 };
794 diff --git a/include/linux/random.h b/include/linux/random.h
795 index 3b9377d..6312dd9 100644
796 --- a/include/linux/random.h
797 +++ b/include/linux/random.h
798 @@ -17,6 +17,7 @@ extern void add_interrupt_randomness(int irq, int irq_flags);
799 extern void get_random_bytes(void *buf, int nbytes);
800 extern void get_random_bytes_arch(void *buf, int nbytes);
801 void generate_random_uuid(unsigned char uuid_out[16]);
802 +extern int random_int_secret_init(void);
803
804 #ifndef MODULE
805 extern const struct file_operations random_fops, urandom_fops;
806 diff --git a/include/linux/sem.h b/include/linux/sem.h
807 index 53d4265..976ce3a 100644
808 --- a/include/linux/sem.h
809 +++ b/include/linux/sem.h
810 @@ -12,10 +12,12 @@ struct task_struct;
811 struct sem_array {
812 struct kern_ipc_perm ____cacheline_aligned_in_smp
813 sem_perm; /* permissions .. see ipc.h */
814 - time_t sem_otime; /* last semop time */
815 time_t sem_ctime; /* last change time */
816 struct sem *sem_base; /* ptr to first semaphore in array */
817 - struct list_head sem_pending; /* pending operations to be processed */
818 + struct list_head pending_alter; /* pending operations */
819 + /* that alter the array */
820 + struct list_head pending_const; /* pending complex operations */
821 + /* that do not alter semvals */
822 struct list_head list_id; /* undo requests on this array */
823 int sem_nsems; /* no. of semaphores in array */
824 int complex_count; /* pending complex operations */
825 diff --git a/init/main.c b/init/main.c
826 index 9484f4b..e83ac04 100644
827 --- a/init/main.c
828 +++ b/init/main.c
829 @@ -74,6 +74,7 @@
830 #include <linux/ptrace.h>
831 #include <linux/blkdev.h>
832 #include <linux/elevator.h>
833 +#include <linux/random.h>
834
835 #include <asm/io.h>
836 #include <asm/bugs.h>
837 @@ -777,6 +778,7 @@ static void __init do_basic_setup(void)
838 do_ctors();
839 usermodehelper_enable();
840 do_initcalls();
841 + random_int_secret_init();
842 }
843
844 static void __init do_pre_smp_initcalls(void)
845 diff --git a/ipc/msg.c b/ipc/msg.c
846 index f8fbe2c..558aa91 100644
847 --- a/ipc/msg.c
848 +++ b/ipc/msg.c
849 @@ -70,8 +70,6 @@ struct msg_sender {
850
851 #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS])
852
853 -#define msg_unlock(msq) ipc_unlock(&(msq)->q_perm)
854 -
855 static void freeque(struct ipc_namespace *, struct kern_ipc_perm *);
856 static int newque(struct ipc_namespace *, struct ipc_params *);
857 #ifdef CONFIG_PROC_FS
858 @@ -141,27 +139,23 @@ void __init msg_init(void)
859 IPC_MSG_IDS, sysvipc_msg_proc_show);
860 }
861
862 -/*
863 - * msg_lock_(check_) routines are called in the paths where the rw_mutex
864 - * is not held.
865 - */
866 -static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id)
867 +static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id)
868 {
869 - struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id);
870 + struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id);
871
872 if (IS_ERR(ipcp))
873 - return (struct msg_queue *)ipcp;
874 + return ERR_CAST(ipcp);
875
876 return container_of(ipcp, struct msg_queue, q_perm);
877 }
878
879 -static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns,
880 - int id)
881 +static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns,
882 + int id)
883 {
884 - struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id);
885 + struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id);
886
887 if (IS_ERR(ipcp))
888 - return (struct msg_queue *)ipcp;
889 + return ERR_CAST(ipcp);
890
891 return container_of(ipcp, struct msg_queue, q_perm);
892 }
893 @@ -171,12 +165,21 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
894 ipc_rmid(&msg_ids(ns), &s->q_perm);
895 }
896
897 +static void msg_rcu_free(struct rcu_head *head)
898 +{
899 + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
900 + struct msg_queue *msq = ipc_rcu_to_struct(p);
901 +
902 + security_msg_queue_free(msq);
903 + ipc_rcu_free(head);
904 +}
905 +
906 /**
907 * newque - Create a new msg queue
908 * @ns: namespace
909 * @params: ptr to the structure that contains the key and msgflg
910 *
911 - * Called with msg_ids.rw_mutex held (writer)
912 + * Called with msg_ids.rwsem held (writer)
913 */
914 static int newque(struct ipc_namespace *ns, struct ipc_params *params)
915 {
916 @@ -195,17 +198,14 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
917 msq->q_perm.security = NULL;
918 retval = security_msg_queue_alloc(msq);
919 if (retval) {
920 - ipc_rcu_putref(msq);
921 + ipc_rcu_putref(msq, ipc_rcu_free);
922 return retval;
923 }
924
925 - /*
926 - * ipc_addid() locks msq
927 - */
928 + /* ipc_addid() locks msq upon success. */
929 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
930 if (id < 0) {
931 - security_msg_queue_free(msq);
932 - ipc_rcu_putref(msq);
933 + ipc_rcu_putref(msq, msg_rcu_free);
934 return id;
935 }
936
937 @@ -218,7 +218,8 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
938 INIT_LIST_HEAD(&msq->q_receivers);
939 INIT_LIST_HEAD(&msq->q_senders);
940
941 - msg_unlock(msq);
942 + ipc_unlock_object(&msq->q_perm);
943 + rcu_read_unlock();
944
945 return msq->q_perm.id;
946 }
947 @@ -264,8 +265,8 @@ static void expunge_all(struct msg_queue *msq, int res)
948 * removes the message queue from message queue ID IDR, and cleans up all the
949 * messages associated with this queue.
950 *
951 - * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held
952 - * before freeque() is called. msg_ids.rw_mutex remains locked on exit.
953 + * msg_ids.rwsem (writer) and the spinlock for this message queue are held
954 + * before freeque() is called. msg_ids.rwsem remains locked on exit.
955 */
956 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
957 {
958 @@ -275,19 +276,19 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
959 expunge_all(msq, -EIDRM);
960 ss_wakeup(&msq->q_senders, 1);
961 msg_rmid(ns, msq);
962 - msg_unlock(msq);
963 + ipc_unlock_object(&msq->q_perm);
964 + rcu_read_unlock();
965
966 list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
967 atomic_dec(&ns->msg_hdrs);
968 free_msg(msg);
969 }
970 atomic_sub(msq->q_cbytes, &ns->msg_bytes);
971 - security_msg_queue_free(msq);
972 - ipc_rcu_putref(msq);
973 + ipc_rcu_putref(msq, msg_rcu_free);
974 }
975
976 /*
977 - * Called with msg_ids.rw_mutex and ipcp locked.
978 + * Called with msg_ids.rwsem and ipcp locked.
979 */
980 static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)
981 {
982 @@ -391,9 +392,9 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
983 }
984
985 /*
986 - * This function handles some msgctl commands which require the rw_mutex
987 + * This function handles some msgctl commands which require the rwsem
988 * to be held in write mode.
989 - * NOTE: no locks must be held, the rw_mutex is taken inside this function.
990 + * NOTE: no locks must be held, the rwsem is taken inside this function.
991 */
992 static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
993 struct msqid_ds __user *buf, int version)
994 @@ -408,31 +409,39 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
995 return -EFAULT;
996 }
997
998 - ipcp = ipcctl_pre_down(ns, &msg_ids(ns), msqid, cmd,
999 - &msqid64.msg_perm, msqid64.msg_qbytes);
1000 - if (IS_ERR(ipcp))
1001 - return PTR_ERR(ipcp);
1002 + down_write(&msg_ids(ns).rwsem);
1003 + rcu_read_lock();
1004 +
1005 + ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd,
1006 + &msqid64.msg_perm, msqid64.msg_qbytes);
1007 + if (IS_ERR(ipcp)) {
1008 + err = PTR_ERR(ipcp);
1009 + goto out_unlock1;
1010 + }
1011
1012 msq = container_of(ipcp, struct msg_queue, q_perm);
1013
1014 err = security_msg_queue_msgctl(msq, cmd);
1015 if (err)
1016 - goto out_unlock;
1017 + goto out_unlock1;
1018
1019 switch (cmd) {
1020 case IPC_RMID:
1021 + ipc_lock_object(&msq->q_perm);
1022 + /* freeque unlocks the ipc object and rcu */
1023 freeque(ns, ipcp);
1024 goto out_up;
1025 case IPC_SET:
1026 if (msqid64.msg_qbytes > ns->msg_ctlmnb &&
1027 !capable(CAP_SYS_RESOURCE)) {
1028 err = -EPERM;
1029 - goto out_unlock;
1030 + goto out_unlock1;
1031 }
1032
1033 + ipc_lock_object(&msq->q_perm);
1034 err = ipc_update_perm(&msqid64.msg_perm, ipcp);
1035 if (err)
1036 - goto out_unlock;
1037 + goto out_unlock0;
1038
1039 msq->q_qbytes = msqid64.msg_qbytes;
1040
1041 @@ -448,25 +457,23 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
1042 break;
1043 default:
1044 err = -EINVAL;
1045 + goto out_unlock1;
1046 }
1047 -out_unlock:
1048 - msg_unlock(msq);
1049 +
1050 +out_unlock0:
1051 + ipc_unlock_object(&msq->q_perm);
1052 +out_unlock1:
1053 + rcu_read_unlock();
1054 out_up:
1055 - up_write(&msg_ids(ns).rw_mutex);
1056 + up_write(&msg_ids(ns).rwsem);
1057 return err;
1058 }
1059
1060 -SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
1061 +static int msgctl_nolock(struct ipc_namespace *ns, int msqid,
1062 + int cmd, int version, void __user *buf)
1063 {
1064 + int err;
1065 struct msg_queue *msq;
1066 - int err, version;
1067 - struct ipc_namespace *ns;
1068 -
1069 - if (msqid < 0 || cmd < 0)
1070 - return -EINVAL;
1071 -
1072 - version = ipc_parse_version(&cmd);
1073 - ns = current->nsproxy->ipc_ns;
1074
1075 switch (cmd) {
1076 case IPC_INFO:
1077 @@ -477,6 +484,7 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
1078
1079 if (!buf)
1080 return -EFAULT;
1081 +
1082 /*
1083 * We must not return kernel stack data.
1084 * due to padding, it's not enough
1085 @@ -492,7 +500,7 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
1086 msginfo.msgmnb = ns->msg_ctlmnb;
1087 msginfo.msgssz = MSGSSZ;
1088 msginfo.msgseg = MSGSEG;
1089 - down_read(&msg_ids(ns).rw_mutex);
1090 + down_read(&msg_ids(ns).rwsem);
1091 if (cmd == MSG_INFO) {
1092 msginfo.msgpool = msg_ids(ns).in_use;
1093 msginfo.msgmap = atomic_read(&ns->msg_hdrs);
1094 @@ -503,12 +511,13 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
1095 msginfo.msgtql = MSGTQL;
1096 }
1097 max_id = ipc_get_maxid(&msg_ids(ns));
1098 - up_read(&msg_ids(ns).rw_mutex);
1099 + up_read(&msg_ids(ns).rwsem);
1100 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
1101 return -EFAULT;
1102 return (max_id < 0) ? 0 : max_id;
1103 }
1104 - case MSG_STAT: /* msqid is an index rather than a msg queue id */
1105 +
1106 + case MSG_STAT:
1107 case IPC_STAT:
1108 {
1109 struct msqid64_ds tbuf;
1110 @@ -517,17 +526,25 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
1111 if (!buf)
1112 return -EFAULT;
1113
1114 + memset(&tbuf, 0, sizeof(tbuf));
1115 +
1116 + rcu_read_lock();
1117 if (cmd == MSG_STAT) {
1118 - msq = msg_lock(ns, msqid);
1119 - if (IS_ERR(msq))
1120 - return PTR_ERR(msq);
1121 + msq = msq_obtain_object(ns, msqid);
1122 + if (IS_ERR(msq)) {
1123 + err = PTR_ERR(msq);
1124 + goto out_unlock;
1125 + }
1126 success_return = msq->q_perm.id;
1127 } else {
1128 - msq = msg_lock_check(ns, msqid);
1129 - if (IS_ERR(msq))
1130 - return PTR_ERR(msq);
1131 + msq = msq_obtain_object_check(ns, msqid);
1132 + if (IS_ERR(msq)) {
1133 + err = PTR_ERR(msq);
1134 + goto out_unlock;
1135 + }
1136 success_return = 0;
1137 }
1138 +
1139 err = -EACCES;
1140 if (ipcperms(ns, &msq->q_perm, S_IRUGO))
1141 goto out_unlock;
1142 @@ -536,8 +553,6 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
1143 if (err)
1144 goto out_unlock;
1145
1146 - memset(&tbuf, 0, sizeof(tbuf));
1147 -
1148 kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm);
1149 tbuf.msg_stime = msq->q_stime;
1150 tbuf.msg_rtime = msq->q_rtime;
1151 @@ -547,24 +562,48 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
1152 tbuf.msg_qbytes = msq->q_qbytes;
1153 tbuf.msg_lspid = msq->q_lspid;
1154 tbuf.msg_lrpid = msq->q_lrpid;
1155 - msg_unlock(msq);
1156 + rcu_read_unlock();
1157 +
1158 if (copy_msqid_to_user(buf, &tbuf, version))
1159 return -EFAULT;
1160 return success_return;
1161 }
1162 - case IPC_SET:
1163 - case IPC_RMID:
1164 - err = msgctl_down(ns, msqid, cmd, buf, version);
1165 - return err;
1166 +
1167 default:
1168 - return -EINVAL;
1169 + return -EINVAL;
1170 }
1171
1172 + return err;
1173 out_unlock:
1174 - msg_unlock(msq);
1175 + rcu_read_unlock();
1176 return err;
1177 }
1178
1179 +SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
1180 +{
1181 + int version;
1182 + struct ipc_namespace *ns;
1183 +
1184 + if (msqid < 0 || cmd < 0)
1185 + return -EINVAL;
1186 +
1187 + version = ipc_parse_version(&cmd);
1188 + ns = current->nsproxy->ipc_ns;
1189 +
1190 + switch (cmd) {
1191 + case IPC_INFO:
1192 + case MSG_INFO:
1193 + case MSG_STAT: /* msqid is an index rather than a msg queue id */
1194 + case IPC_STAT:
1195 + return msgctl_nolock(ns, msqid, cmd, version, buf);
1196 + case IPC_SET:
1197 + case IPC_RMID:
1198 + return msgctl_down(ns, msqid, cmd, buf, version);
1199 + default:
1200 + return -EINVAL;
1201 + }
1202 +}
1203 +
1204 static int testmsg(struct msg_msg *msg, long type, int mode)
1205 {
1206 switch(mode)
1207 @@ -640,22 +679,31 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
1208 msg->m_type = mtype;
1209 msg->m_ts = msgsz;
1210
1211 - msq = msg_lock_check(ns, msqid);
1212 + rcu_read_lock();
1213 + msq = msq_obtain_object_check(ns, msqid);
1214 if (IS_ERR(msq)) {
1215 err = PTR_ERR(msq);
1216 - goto out_free;
1217 + goto out_unlock1;
1218 }
1219
1220 + ipc_lock_object(&msq->q_perm);
1221 +
1222 for (;;) {
1223 struct msg_sender s;
1224
1225 err = -EACCES;
1226 if (ipcperms(ns, &msq->q_perm, S_IWUGO))
1227 - goto out_unlock_free;
1228 + goto out_unlock0;
1229 +
1230 + /* raced with RMID? */
1231 + if (msq->q_perm.deleted) {
1232 + err = -EIDRM;
1233 + goto out_unlock0;
1234 + }
1235
1236 err = security_msg_queue_msgsnd(msq, msg, msgflg);
1237 if (err)
1238 - goto out_unlock_free;
1239 + goto out_unlock0;
1240
1241 if (msgsz + msq->q_cbytes <= msq->q_qbytes &&
1242 1 + msq->q_qnum <= msq->q_qbytes) {
1243 @@ -665,32 +713,37 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
1244 /* queue full, wait: */
1245 if (msgflg & IPC_NOWAIT) {
1246 err = -EAGAIN;
1247 - goto out_unlock_free;
1248 + goto out_unlock0;
1249 }
1250 +
1251 ss_add(msq, &s);
1252
1253 if (!ipc_rcu_getref(msq)) {
1254 err = -EIDRM;
1255 - goto out_unlock_free;
1256 + goto out_unlock0;
1257 }
1258
1259 - msg_unlock(msq);
1260 + ipc_unlock_object(&msq->q_perm);
1261 + rcu_read_unlock();
1262 schedule();
1263
1264 - ipc_lock_by_ptr(&msq->q_perm);
1265 - ipc_rcu_putref(msq);
1266 + rcu_read_lock();
1267 + ipc_lock_object(&msq->q_perm);
1268 +
1269 + ipc_rcu_putref(msq, ipc_rcu_free);
1270 if (msq->q_perm.deleted) {
1271 err = -EIDRM;
1272 - goto out_unlock_free;
1273 + goto out_unlock0;
1274 }
1275 +
1276 ss_del(&s);
1277
1278 if (signal_pending(current)) {
1279 err = -ERESTARTNOHAND;
1280 - goto out_unlock_free;
1281 + goto out_unlock0;
1282 }
1283 - }
1284
1285 + }
1286 msq->q_lspid = task_tgid_vnr(current);
1287 msq->q_stime = get_seconds();
1288
1289 @@ -706,9 +759,10 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
1290 err = 0;
1291 msg = NULL;
1292
1293 -out_unlock_free:
1294 - msg_unlock(msq);
1295 -out_free:
1296 +out_unlock0:
1297 + ipc_unlock_object(&msq->q_perm);
1298 +out_unlock1:
1299 + rcu_read_unlock();
1300 if (msg != NULL)
1301 free_msg(msg);
1302 return err;
1303 @@ -817,21 +871,19 @@ static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode)
1304 return found ?: ERR_PTR(-EAGAIN);
1305 }
1306
1307 -
1308 -long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
1309 - int msgflg,
1310 +long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg,
1311 long (*msg_handler)(void __user *, struct msg_msg *, size_t))
1312 {
1313 - struct msg_queue *msq;
1314 - struct msg_msg *msg;
1315 int mode;
1316 + struct msg_queue *msq;
1317 struct ipc_namespace *ns;
1318 - struct msg_msg *copy = NULL;
1319 + struct msg_msg *msg, *copy = NULL;
1320
1321 ns = current->nsproxy->ipc_ns;
1322
1323 if (msqid < 0 || (long) bufsz < 0)
1324 return -EINVAL;
1325 +
1326 if (msgflg & MSG_COPY) {
1327 copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax));
1328 if (IS_ERR(copy))
1329 @@ -839,8 +891,10 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
1330 }
1331 mode = convert_mode(&msgtyp, msgflg);
1332
1333 - msq = msg_lock_check(ns, msqid);
1334 + rcu_read_lock();
1335 + msq = msq_obtain_object_check(ns, msqid);
1336 if (IS_ERR(msq)) {
1337 + rcu_read_unlock();
1338 free_copy(copy);
1339 return PTR_ERR(msq);
1340 }
1341 @@ -850,10 +904,17 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
1342
1343 msg = ERR_PTR(-EACCES);
1344 if (ipcperms(ns, &msq->q_perm, S_IRUGO))
1345 - goto out_unlock;
1346 + goto out_unlock1;
1347
1348 - msg = find_msg(msq, &msgtyp, mode);
1349 + ipc_lock_object(&msq->q_perm);
1350 +
1351 + /* raced with RMID? */
1352 + if (msq->q_perm.deleted) {
1353 + msg = ERR_PTR(-EIDRM);
1354 + goto out_unlock0;
1355 + }
1356
1357 + msg = find_msg(msq, &msgtyp, mode);
1358 if (!IS_ERR(msg)) {
1359 /*
1360 * Found a suitable message.
1361 @@ -861,7 +922,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
1362 */
1363 if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
1364 msg = ERR_PTR(-E2BIG);
1365 - goto out_unlock;
1366 + goto out_unlock0;
1367 }
1368 /*
1369 * If we are copying, then do not unlink message and do
1370 @@ -869,8 +930,9 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
1371 */
1372 if (msgflg & MSG_COPY) {
1373 msg = copy_msg(msg, copy);
1374 - goto out_unlock;
1375 + goto out_unlock0;
1376 }
1377 +
1378 list_del(&msg->m_list);
1379 msq->q_qnum--;
1380 msq->q_rtime = get_seconds();
1381 @@ -879,14 +941,16 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
1382 atomic_sub(msg->m_ts, &ns->msg_bytes);
1383 atomic_dec(&ns->msg_hdrs);
1384 ss_wakeup(&msq->q_senders, 0);
1385 - msg_unlock(msq);
1386 - break;
1387 +
1388 + goto out_unlock0;
1389 }
1390 +
1391 /* No message waiting. Wait for a message */
1392 if (msgflg & IPC_NOWAIT) {
1393 msg = ERR_PTR(-ENOMSG);
1394 - goto out_unlock;
1395 + goto out_unlock0;
1396 }
1397 +
1398 list_add_tail(&msr_d.r_list, &msq->q_receivers);
1399 msr_d.r_tsk = current;
1400 msr_d.r_msgtype = msgtyp;
1401 @@ -897,8 +961,9 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
1402 msr_d.r_maxsize = bufsz;
1403 msr_d.r_msg = ERR_PTR(-EAGAIN);
1404 current->state = TASK_INTERRUPTIBLE;
1405 - msg_unlock(msq);
1406
1407 + ipc_unlock_object(&msq->q_perm);
1408 + rcu_read_unlock();
1409 schedule();
1410
1411 /* Lockless receive, part 1:
1412 @@ -909,7 +974,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
1413 * Prior to destruction, expunge_all(-EIRDM) changes r_msg.
1414 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed.
1415 * rcu_read_lock() prevents preemption between reading r_msg
1416 - * and the spin_lock() inside ipc_lock_by_ptr().
1417 + * and acquiring the q_perm.lock in ipc_lock_object().
1418 */
1419 rcu_read_lock();
1420
1421 @@ -928,32 +993,34 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
1422 * If there is a message or an error then accept it without
1423 * locking.
1424 */
1425 - if (msg != ERR_PTR(-EAGAIN)) {
1426 - rcu_read_unlock();
1427 - break;
1428 - }
1429 + if (msg != ERR_PTR(-EAGAIN))
1430 + goto out_unlock1;
1431
1432 /* Lockless receive, part 3:
1433 * Acquire the queue spinlock.
1434 */
1435 - ipc_lock_by_ptr(&msq->q_perm);
1436 - rcu_read_unlock();
1437 + ipc_lock_object(&msq->q_perm);
1438
1439 /* Lockless receive, part 4:
1440 * Repeat test after acquiring the spinlock.
1441 */
1442 msg = (struct msg_msg*)msr_d.r_msg;
1443 if (msg != ERR_PTR(-EAGAIN))
1444 - goto out_unlock;
1445 + goto out_unlock0;
1446
1447 list_del(&msr_d.r_list);
1448 if (signal_pending(current)) {
1449 msg = ERR_PTR(-ERESTARTNOHAND);
1450 -out_unlock:
1451 - msg_unlock(msq);
1452 - break;
1453 + goto out_unlock0;
1454 }
1455 +
1456 + ipc_unlock_object(&msq->q_perm);
1457 }
1458 +
1459 +out_unlock0:
1460 + ipc_unlock_object(&msq->q_perm);
1461 +out_unlock1:
1462 + rcu_read_unlock();
1463 if (IS_ERR(msg)) {
1464 free_copy(copy);
1465 return PTR_ERR(msg);
1466 diff --git a/ipc/namespace.c b/ipc/namespace.c
1467 index 7ee61bf..aba9a58 100644
1468 --- a/ipc/namespace.c
1469 +++ b/ipc/namespace.c
1470 @@ -81,7 +81,7 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
1471 int next_id;
1472 int total, in_use;
1473
1474 - down_write(&ids->rw_mutex);
1475 + down_write(&ids->rwsem);
1476
1477 in_use = ids->in_use;
1478
1479 @@ -89,11 +89,12 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
1480 perm = idr_find(&ids->ipcs_idr, next_id);
1481 if (perm == NULL)
1482 continue;
1483 - ipc_lock_by_ptr(perm);
1484 + rcu_read_lock();
1485 + ipc_lock_object(perm);
1486 free(ns, perm);
1487 total++;
1488 }
1489 - up_write(&ids->rw_mutex);
1490 + up_write(&ids->rwsem);
1491 }
1492
1493 static void free_ipc_ns(struct ipc_namespace *ns)
1494 diff --git a/ipc/sem.c b/ipc/sem.c
1495 index 70480a3..8c4f59b 100644
1496 --- a/ipc/sem.c
1497 +++ b/ipc/sem.c
1498 @@ -95,8 +95,12 @@ struct sem {
1499 int semval; /* current value */
1500 int sempid; /* pid of last operation */
1501 spinlock_t lock; /* spinlock for fine-grained semtimedop */
1502 - struct list_head sem_pending; /* pending single-sop operations */
1503 -};
1504 + struct list_head pending_alter; /* pending single-sop operations */
1505 + /* that alter the semaphore */
1506 + struct list_head pending_const; /* pending single-sop operations */
1507 + /* that do not alter the semaphore*/
1508 + time_t sem_otime; /* candidate for sem_otime */
1509 +} ____cacheline_aligned_in_smp;
1510
1511 /* One queue for each sleeping process in the system. */
1512 struct sem_queue {
1513 @@ -150,12 +154,15 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
1514 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */
1515
1516 /*
1517 - * linked list protection:
1518 + * Locking:
1519 * sem_undo.id_next,
1520 - * sem_array.sem_pending{,last},
1521 - * sem_array.sem_undo: sem_lock() for read/write
1522 + * sem_array.complex_count,
1523 + * sem_array.pending{_alter,_cont},
1524 + * sem_array.sem_undo: global sem_lock() for read/write
1525 * sem_undo.proc_next: only "current" is allowed to read/write that field.
1526 *
1527 + * sem_array.sem_base[i].pending_{const,alter}:
1528 + * global or semaphore sem_lock() for read/write
1529 */
1530
1531 #define sc_semmsl sem_ctls[0]
1532 @@ -189,77 +196,176 @@ void __init sem_init (void)
1533 IPC_SEM_IDS, sysvipc_sem_proc_show);
1534 }
1535
1536 +/**
1537 + * unmerge_queues - unmerge queues, if possible.
1538 + * @sma: semaphore array
1539 + *
1540 + * The function unmerges the wait queues if complex_count is 0.
1541 + * It must be called prior to dropping the global semaphore array lock.
1542 + */
1543 +static void unmerge_queues(struct sem_array *sma)
1544 +{
1545 + struct sem_queue *q, *tq;
1546 +
1547 + /* complex operations still around? */
1548 + if (sma->complex_count)
1549 + return;
1550 + /*
1551 + * We will switch back to simple mode.
1552 + * Move all pending operation back into the per-semaphore
1553 + * queues.
1554 + */
1555 + list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
1556 + struct sem *curr;
1557 + curr = &sma->sem_base[q->sops[0].sem_num];
1558 +
1559 + list_add_tail(&q->list, &curr->pending_alter);
1560 + }
1561 + INIT_LIST_HEAD(&sma->pending_alter);
1562 +}
1563 +
1564 +/**
1565 + * merge_queues - Merge single semop queues into global queue
1566 + * @sma: semaphore array
1567 + *
1568 + * This function merges all per-semaphore queues into the global queue.
1569 + * It is necessary to achieve FIFO ordering for the pending single-sop
1570 + * operations when a multi-semop operation must sleep.
1571 + * Only the alter operations must be moved, the const operations can stay.
1572 + */
1573 +static void merge_queues(struct sem_array *sma)
1574 +{
1575 + int i;
1576 + for (i = 0; i < sma->sem_nsems; i++) {
1577 + struct sem *sem = sma->sem_base + i;
1578 +
1579 + list_splice_init(&sem->pending_alter, &sma->pending_alter);
1580 + }
1581 +}
1582 +
1583 +static void sem_rcu_free(struct rcu_head *head)
1584 +{
1585 + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
1586 + struct sem_array *sma = ipc_rcu_to_struct(p);
1587 +
1588 + security_sem_free(sma);
1589 + ipc_rcu_free(head);
1590 +}
1591 +
1592 +/*
1593 + * Wait until all currently ongoing simple ops have completed.
1594 + * Caller must own sem_perm.lock.
1595 + * New simple ops cannot start, because simple ops first check
1596 + * that sem_perm.lock is free.
1597 + * that a) sem_perm.lock is free and b) complex_count is 0.
1598 + */
1599 +static void sem_wait_array(struct sem_array *sma)
1600 +{
1601 + int i;
1602 + struct sem *sem;
1603 +
1604 + if (sma->complex_count) {
1605 + /* The thread that increased sma->complex_count waited on
1606 + * all sem->lock locks. Thus we don't need to wait again.
1607 + */
1608 + return;
1609 + }
1610 +
1611 + for (i = 0; i < sma->sem_nsems; i++) {
1612 + sem = sma->sem_base + i;
1613 + spin_unlock_wait(&sem->lock);
1614 + }
1615 +}
1616 +
1617 /*
1618 * If the request contains only one semaphore operation, and there are
1619 * no complex transactions pending, lock only the semaphore involved.
1620 * Otherwise, lock the entire semaphore array, since we either have
1621 * multiple semaphores in our own semops, or we need to look at
1622 * semaphores from other pending complex operations.
1623 - *
1624 - * Carefully guard against sma->complex_count changing between zero
1625 - * and non-zero while we are spinning for the lock. The value of
1626 - * sma->complex_count cannot change while we are holding the lock,
1627 - * so sem_unlock should be fine.
1628 - *
1629 - * The global lock path checks that all the local locks have been released,
1630 - * checking each local lock once. This means that the local lock paths
1631 - * cannot start their critical sections while the global lock is held.
1632 */
1633 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
1634 int nsops)
1635 {
1636 - int locknum;
1637 - again:
1638 - if (nsops == 1 && !sma->complex_count) {
1639 - struct sem *sem = sma->sem_base + sops->sem_num;
1640 + struct sem *sem;
1641
1642 - /* Lock just the semaphore we are interested in. */
1643 - spin_lock(&sem->lock);
1644 + if (nsops != 1) {
1645 + /* Complex operation - acquire a full lock */
1646 + ipc_lock_object(&sma->sem_perm);
1647
1648 - /*
1649 - * If sma->complex_count was set while we were spinning,
1650 - * we may need to look at things we did not lock here.
1651 + /* And wait until all simple ops that are processed
1652 + * right now have dropped their locks.
1653 */
1654 - if (unlikely(sma->complex_count)) {
1655 - spin_unlock(&sem->lock);
1656 - goto lock_array;
1657 - }
1658 + sem_wait_array(sma);
1659 + return -1;
1660 + }
1661 +
1662 + /*
1663 + * Only one semaphore affected - try to optimize locking.
1664 + * The rules are:
1665 + * - optimized locking is possible if no complex operation
1666 + * is either enqueued or processed right now.
1667 + * - The test for enqueued complex ops is simple:
1668 + * sma->complex_count != 0
1669 + * - Testing for complex ops that are processed right now is
1670 + * a bit more difficult. Complex ops acquire the full lock
1671 + * and first wait that the running simple ops have completed.
1672 + * (see above)
1673 + * Thus: If we own a simple lock and the global lock is free
1674 + * and complex_count is now 0, then it will stay 0 and
1675 + * thus just locking sem->lock is sufficient.
1676 + */
1677 + sem = sma->sem_base + sops->sem_num;
1678
1679 + if (sma->complex_count == 0) {
1680 /*
1681 - * Another process is holding the global lock on the
1682 - * sem_array; we cannot enter our critical section,
1683 - * but have to wait for the global lock to be released.
1684 + * It appears that no complex operation is around.
1685 + * Acquire the per-semaphore lock.
1686 */
1687 - if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {
1688 - spin_unlock(&sem->lock);
1689 - spin_unlock_wait(&sma->sem_perm.lock);
1690 - goto again;
1691 + spin_lock(&sem->lock);
1692 +
1693 + /* Then check that the global lock is free */
1694 + if (!spin_is_locked(&sma->sem_perm.lock)) {
1695 + /* spin_is_locked() is not a memory barrier */
1696 + smp_mb();
1697 +
1698 + /* Now repeat the test of complex_count:
1699 + * It can't change anymore until we drop sem->lock.
1700 + * Thus: if is now 0, then it will stay 0.
1701 + */
1702 + if (sma->complex_count == 0) {
1703 + /* fast path successful! */
1704 + return sops->sem_num;
1705 + }
1706 }
1707 + spin_unlock(&sem->lock);
1708 + }
1709 +
1710 + /* slow path: acquire the full lock */
1711 + ipc_lock_object(&sma->sem_perm);
1712
1713 - locknum = sops->sem_num;
1714 + if (sma->complex_count == 0) {
1715 + /* False alarm:
1716 + * There is no complex operation, thus we can switch
1717 + * back to the fast path.
1718 + */
1719 + spin_lock(&sem->lock);
1720 + ipc_unlock_object(&sma->sem_perm);
1721 + return sops->sem_num;
1722 } else {
1723 - int i;
1724 - /*
1725 - * Lock the semaphore array, and wait for all of the
1726 - * individual semaphore locks to go away. The code
1727 - * above ensures no new single-lock holders will enter
1728 - * their critical section while the array lock is held.
1729 + /* Not a false alarm, thus complete the sequence for a
1730 + * full lock.
1731 */
1732 - lock_array:
1733 - spin_lock(&sma->sem_perm.lock);
1734 - for (i = 0; i < sma->sem_nsems; i++) {
1735 - struct sem *sem = sma->sem_base + i;
1736 - spin_unlock_wait(&sem->lock);
1737 - }
1738 - locknum = -1;
1739 + sem_wait_array(sma);
1740 + return -1;
1741 }
1742 - return locknum;
1743 }
1744
1745 static inline void sem_unlock(struct sem_array *sma, int locknum)
1746 {
1747 if (locknum == -1) {
1748 - spin_unlock(&sma->sem_perm.lock);
1749 + unmerge_queues(sma);
1750 + ipc_unlock_object(&sma->sem_perm);
1751 } else {
1752 struct sem *sem = sma->sem_base + locknum;
1753 spin_unlock(&sem->lock);
1754 @@ -267,7 +373,7 @@ static inline void sem_unlock(struct sem_array *sma, int locknum)
1755 }
1756
1757 /*
1758 - * sem_lock_(check_) routines are called in the paths where the rw_mutex
1759 + * sem_lock_(check_) routines are called in the paths where the rwsem
1760 * is not held.
1761 *
1762 * The caller holds the RCU read lock.
1763 @@ -319,12 +425,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
1764 static inline void sem_lock_and_putref(struct sem_array *sma)
1765 {
1766 sem_lock(sma, NULL, -1);
1767 - ipc_rcu_putref(sma);
1768 -}
1769 -
1770 -static inline void sem_putref(struct sem_array *sma)
1771 -{
1772 - ipc_rcu_putref(sma);
1773 + ipc_rcu_putref(sma, ipc_rcu_free);
1774 }
1775
1776 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
1777 @@ -337,7 +438,7 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
1778 * Without the check/retry algorithm a lockless wakeup is possible:
1779 * - queue.status is initialized to -EINTR before blocking.
1780 * - wakeup is performed by
1781 - * * unlinking the queue entry from sma->sem_pending
1782 + * * unlinking the queue entry from the pending list
1783 * * setting queue.status to IN_WAKEUP
1784 * This is the notification for the blocked thread that a
1785 * result value is imminent.
1786 @@ -371,7 +472,7 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
1787 * @ns: namespace
1788 * @params: ptr to the structure that contains key, semflg and nsems
1789 *
1790 - * Called with sem_ids.rw_mutex held (as a writer)
1791 + * Called with sem_ids.rwsem held (as a writer)
1792 */
1793
1794 static int newary(struct ipc_namespace *ns, struct ipc_params *params)
1795 @@ -403,14 +504,13 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
1796 sma->sem_perm.security = NULL;
1797 retval = security_sem_alloc(sma);
1798 if (retval) {
1799 - ipc_rcu_putref(sma);
1800 + ipc_rcu_putref(sma, ipc_rcu_free);
1801 return retval;
1802 }
1803
1804 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
1805 if (id < 0) {
1806 - security_sem_free(sma);
1807 - ipc_rcu_putref(sma);
1808 + ipc_rcu_putref(sma, sem_rcu_free);
1809 return id;
1810 }
1811 ns->used_sems += nsems;
1812 @@ -418,12 +518,14 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
1813 sma->sem_base = (struct sem *) &sma[1];
1814
1815 for (i = 0; i < nsems; i++) {
1816 - INIT_LIST_HEAD(&sma->sem_base[i].sem_pending);
1817 + INIT_LIST_HEAD(&sma->sem_base[i].pending_alter);
1818 + INIT_LIST_HEAD(&sma->sem_base[i].pending_const);
1819 spin_lock_init(&sma->sem_base[i].lock);
1820 }
1821
1822 sma->complex_count = 0;
1823 - INIT_LIST_HEAD(&sma->sem_pending);
1824 + INIT_LIST_HEAD(&sma->pending_alter);
1825 + INIT_LIST_HEAD(&sma->pending_const);
1826 INIT_LIST_HEAD(&sma->list_id);
1827 sma->sem_nsems = nsems;
1828 sma->sem_ctime = get_seconds();
1829 @@ -435,7 +537,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
1830
1831
1832 /*
1833 - * Called with sem_ids.rw_mutex and ipcp locked.
1834 + * Called with sem_ids.rwsem and ipcp locked.
1835 */
1836 static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
1837 {
1838 @@ -446,7 +548,7 @@ static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
1839 }
1840
1841 /*
1842 - * Called with sem_ids.rw_mutex and ipcp locked.
1843 + * Called with sem_ids.rwsem and ipcp locked.
1844 */
1845 static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
1846 struct ipc_params *params)
1847 @@ -482,12 +584,19 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
1848 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
1849 }
1850
1851 -/*
1852 - * Determine whether a sequence of semaphore operations would succeed
1853 - * all at once. Return 0 if yes, 1 if need to sleep, else return error code.
1854 +/** perform_atomic_semop - Perform (if possible) a semaphore operation
1855 + * @sma: semaphore array
1856 + * @sops: array with operations that should be checked
1857 + * @nsems: number of sops
1858 + * @un: undo array
1859 + * @pid: pid that did the change
1860 + *
1861 + * Returns 0 if the operation was possible.
1862 + * Returns 1 if the operation is impossible, the caller must sleep.
1863 + * Negative values are error codes.
1864 */
1865
1866 -static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops,
1867 +static int perform_atomic_semop(struct sem_array *sma, struct sembuf *sops,
1868 int nsops, struct sem_undo *un, int pid)
1869 {
1870 int result, sem_op;
1871 @@ -609,60 +718,132 @@ static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
1872 * update_queue is O(N^2) when it restarts scanning the whole queue of
1873 * waiting operations. Therefore this function checks if the restart is
1874 * really necessary. It is called after a previously waiting operation
1875 - * was completed.
1876 + * modified the array.
1877 + * Note that wait-for-zero operations are handled without restart.
1878 */
1879 static int check_restart(struct sem_array *sma, struct sem_queue *q)
1880 {
1881 - struct sem *curr;
1882 - struct sem_queue *h;
1883 -
1884 - /* if the operation didn't modify the array, then no restart */
1885 - if (q->alter == 0)
1886 - return 0;
1887 -
1888 - /* pending complex operations are too difficult to analyse */
1889 - if (sma->complex_count)
1890 + /* pending complex alter operations are too difficult to analyse */
1891 + if (!list_empty(&sma->pending_alter))
1892 return 1;
1893
1894 /* we were a sleeping complex operation. Too difficult */
1895 if (q->nsops > 1)
1896 return 1;
1897
1898 - curr = sma->sem_base + q->sops[0].sem_num;
1899 + /* It is impossible that someone waits for the new value:
1900 + * - complex operations always restart.
1901 + * - wait-for-zero are handled seperately.
1902 + * - q is a previously sleeping simple operation that
1903 + * altered the array. It must be a decrement, because
1904 + * simple increments never sleep.
1905 + * - If there are older (higher priority) decrements
1906 + * in the queue, then they have observed the original
1907 + * semval value and couldn't proceed. The operation
1908 + * decremented to value - thus they won't proceed either.
1909 + */
1910 + return 0;
1911 +}
1912
1913 - /* No-one waits on this queue */
1914 - if (list_empty(&curr->sem_pending))
1915 - return 0;
1916 +/**
1917 + * wake_const_ops(sma, semnum, pt) - Wake up non-alter tasks
1918 + * @sma: semaphore array.
1919 + * @semnum: semaphore that was modified.
1920 + * @pt: list head for the tasks that must be woken up.
1921 + *
1922 + * wake_const_ops must be called after a semaphore in a semaphore array
1923 + * was set to 0. If complex const operations are pending, wake_const_ops must
1924 + * be called with semnum = -1, as well as with the number of each modified
1925 + * semaphore.
1926 + * The tasks that must be woken up are added to @pt. The return code
1927 + * is stored in q->pid.
1928 + * The function returns 1 if at least one operation was completed successfully.
1929 + */
1930 +static int wake_const_ops(struct sem_array *sma, int semnum,
1931 + struct list_head *pt)
1932 +{
1933 + struct sem_queue *q;
1934 + struct list_head *walk;
1935 + struct list_head *pending_list;
1936 + int semop_completed = 0;
1937
1938 - /* the new semaphore value */
1939 - if (curr->semval) {
1940 - /* It is impossible that someone waits for the new value:
1941 - * - q is a previously sleeping simple operation that
1942 - * altered the array. It must be a decrement, because
1943 - * simple increments never sleep.
1944 - * - The value is not 0, thus wait-for-zero won't proceed.
1945 - * - If there are older (higher priority) decrements
1946 - * in the queue, then they have observed the original
1947 - * semval value and couldn't proceed. The operation
1948 - * decremented to value - thus they won't proceed either.
1949 + if (semnum == -1)
1950 + pending_list = &sma->pending_const;
1951 + else
1952 + pending_list = &sma->sem_base[semnum].pending_const;
1953 +
1954 + walk = pending_list->next;
1955 + while (walk != pending_list) {
1956 + int error;
1957 +
1958 + q = container_of(walk, struct sem_queue, list);
1959 + walk = walk->next;
1960 +
1961 + error = perform_atomic_semop(sma, q->sops, q->nsops,
1962 + q->undo, q->pid);
1963 +
1964 + if (error <= 0) {
1965 + /* operation completed, remove from queue & wakeup */
1966 +
1967 + unlink_queue(sma, q);
1968 +
1969 + wake_up_sem_queue_prepare(pt, q, error);
1970 + if (error == 0)
1971 + semop_completed = 1;
1972 + }
1973 + }
1974 + return semop_completed;
1975 +}
1976 +
1977 +/**
1978 + * do_smart_wakeup_zero(sma, sops, nsops, pt) - wakeup all wait for zero tasks
1979 + * @sma: semaphore array
1980 + * @sops: operations that were performed
1981 + * @nsops: number of operations
1982 + * @pt: list head of the tasks that must be woken up.
1983 + *
1984 + * do_smart_wakeup_zero() checks all required queue for wait-for-zero
1985 + * operations, based on the actual changes that were performed on the
1986 + * semaphore array.
1987 + * The function returns 1 if at least one operation was completed successfully.
1988 + */
1989 +static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
1990 + int nsops, struct list_head *pt)
1991 +{
1992 + int i;
1993 + int semop_completed = 0;
1994 + int got_zero = 0;
1995 +
1996 + /* first: the per-semaphore queues, if known */
1997 + if (sops) {
1998 + for (i = 0; i < nsops; i++) {
1999 + int num = sops[i].sem_num;
2000 +
2001 + if (sma->sem_base[num].semval == 0) {
2002 + got_zero = 1;
2003 + semop_completed |= wake_const_ops(sma, num, pt);
2004 + }
2005 + }
2006 + } else {
2007 + /*
2008 + * No sops means modified semaphores not known.
2009 + * Assume all were changed.
2010 */
2011 - BUG_ON(q->sops[0].sem_op >= 0);
2012 - return 0;
2013 + for (i = 0; i < sma->sem_nsems; i++) {
2014 + if (sma->sem_base[i].semval == 0) {
2015 + got_zero = 1;
2016 + semop_completed |= wake_const_ops(sma, i, pt);
2017 + }
2018 + }
2019 }
2020 /*
2021 - * semval is 0. Check if there are wait-for-zero semops.
2022 - * They must be the first entries in the per-semaphore queue
2023 + * If one of the modified semaphores got 0,
2024 + * then check the global queue, too.
2025 */
2026 - h = list_first_entry(&curr->sem_pending, struct sem_queue, list);
2027 - BUG_ON(h->nsops != 1);
2028 - BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num);
2029 + if (got_zero)
2030 + semop_completed |= wake_const_ops(sma, -1, pt);
2031
2032 - /* Yes, there is a wait-for-zero semop. Restart */
2033 - if (h->sops[0].sem_op == 0)
2034 - return 1;
2035 -
2036 - /* Again - no-one is waiting for the new value. */
2037 - return 0;
2038 + return semop_completed;
2039 }
2040
2041
2042 @@ -678,6 +859,8 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q)
2043 * semaphore.
2044 * The tasks that must be woken up are added to @pt. The return code
2045 * is stored in q->pid.
2046 + * The function internally checks if const operations can now succeed.
2047 + *
2048 * The function return 1 if at least one semop was completed successfully.
2049 */
2050 static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt)
2051 @@ -688,9 +871,9 @@ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt)
2052 int semop_completed = 0;
2053
2054 if (semnum == -1)
2055 - pending_list = &sma->sem_pending;
2056 + pending_list = &sma->pending_alter;
2057 else
2058 - pending_list = &sma->sem_base[semnum].sem_pending;
2059 + pending_list = &sma->sem_base[semnum].pending_alter;
2060
2061 again:
2062 walk = pending_list->next;
2063 @@ -702,16 +885,15 @@ again:
2064
2065 /* If we are scanning the single sop, per-semaphore list of
2066 * one semaphore and that semaphore is 0, then it is not
2067 - * necessary to scan the "alter" entries: simple increments
2068 + * necessary to scan further: simple increments
2069 * that affect only one entry succeed immediately and cannot
2070 * be in the per semaphore pending queue, and decrements
2071 * cannot be successful if the value is already 0.
2072 */
2073 - if (semnum != -1 && sma->sem_base[semnum].semval == 0 &&
2074 - q->alter)
2075 + if (semnum != -1 && sma->sem_base[semnum].semval == 0)
2076 break;
2077
2078 - error = try_atomic_semop(sma, q->sops, q->nsops,
2079 + error = perform_atomic_semop(sma, q->sops, q->nsops,
2080 q->undo, q->pid);
2081
2082 /* Does q->sleeper still need to sleep? */
2083 @@ -724,6 +906,7 @@ again:
2084 restart = 0;
2085 } else {
2086 semop_completed = 1;
2087 + do_smart_wakeup_zero(sma, q->sops, q->nsops, pt);
2088 restart = check_restart(sma, q);
2089 }
2090
2091 @@ -735,6 +918,24 @@ again:
2092 }
2093
2094 /**
2095 + * set_semotime(sma, sops) - set sem_otime
2096 + * @sma: semaphore array
2097 + * @sops: operations that modified the array, may be NULL
2098 + *
2099 + * sem_otime is replicated to avoid cache line trashing.
2100 + * This function sets one instance to the current time.
2101 + */
2102 +static void set_semotime(struct sem_array *sma, struct sembuf *sops)
2103 +{
2104 + if (sops == NULL) {
2105 + sma->sem_base[0].sem_otime = get_seconds();
2106 + } else {
2107 + sma->sem_base[sops[0].sem_num].sem_otime =
2108 + get_seconds();
2109 + }
2110 +}
2111 +
2112 +/**
2113 * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue
2114 * @sma: semaphore array
2115 * @sops: operations that were performed
2116 @@ -742,8 +943,8 @@ again:
2117 * @otime: force setting otime
2118 * @pt: list head of the tasks that must be woken up.
2119 *
2120 - * do_smart_update() does the required called to update_queue, based on the
2121 - * actual changes that were performed on the semaphore array.
2122 + * do_smart_update() does the required calls to update_queue and wakeup_zero,
2123 + * based on the actual changes that were performed on the semaphore array.
2124 * Note that the function does not do the actual wake-up: the caller is
2125 * responsible for calling wake_up_sem_queue_do(@pt).
2126 * It is safe to perform this call after dropping all locks.
2127 @@ -752,52 +953,42 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop
2128 int otime, struct list_head *pt)
2129 {
2130 int i;
2131 - int progress;
2132 -
2133 - progress = 1;
2134 -retry_global:
2135 - if (sma->complex_count) {
2136 - if (update_queue(sma, -1, pt)) {
2137 - progress = 1;
2138 - otime = 1;
2139 - sops = NULL;
2140 - }
2141 - }
2142 - if (!progress)
2143 - goto done;
2144
2145 - if (!sops) {
2146 - /* No semops; something special is going on. */
2147 - for (i = 0; i < sma->sem_nsems; i++) {
2148 - if (update_queue(sma, i, pt)) {
2149 - otime = 1;
2150 - progress = 1;
2151 - }
2152 - }
2153 - goto done_checkretry;
2154 - }
2155 + otime |= do_smart_wakeup_zero(sma, sops, nsops, pt);
2156
2157 - /* Check the semaphores that were modified. */
2158 - for (i = 0; i < nsops; i++) {
2159 - if (sops[i].sem_op > 0 ||
2160 - (sops[i].sem_op < 0 &&
2161 - sma->sem_base[sops[i].sem_num].semval == 0))
2162 - if (update_queue(sma, sops[i].sem_num, pt)) {
2163 - otime = 1;
2164 - progress = 1;
2165 + if (!list_empty(&sma->pending_alter)) {
2166 + /* semaphore array uses the global queue - just process it. */
2167 + otime |= update_queue(sma, -1, pt);
2168 + } else {
2169 + if (!sops) {
2170 + /*
2171 + * No sops, thus the modified semaphores are not
2172 + * known. Check all.
2173 + */
2174 + for (i = 0; i < sma->sem_nsems; i++)
2175 + otime |= update_queue(sma, i, pt);
2176 + } else {
2177 + /*
2178 + * Check the semaphores that were increased:
2179 + * - No complex ops, thus all sleeping ops are
2180 + * decrease.
2181 + * - if we decreased the value, then any sleeping
2182 + * semaphore ops wont be able to run: If the
2183 + * previous value was too small, then the new
2184 + * value will be too small, too.
2185 + */
2186 + for (i = 0; i < nsops; i++) {
2187 + if (sops[i].sem_op > 0) {
2188 + otime |= update_queue(sma,
2189 + sops[i].sem_num, pt);
2190 + }
2191 }
2192 + }
2193 }
2194 -done_checkretry:
2195 - if (progress) {
2196 - progress = 0;
2197 - goto retry_global;
2198 - }
2199 -done:
2200 if (otime)
2201 - sma->sem_otime = get_seconds();
2202 + set_semotime(sma, sops);
2203 }
2204
2205 -
2206 /* The following counts are associated to each semaphore:
2207 * semncnt number of tasks waiting on semval being nonzero
2208 * semzcnt number of tasks waiting on semval being zero
2209 @@ -813,14 +1004,14 @@ static int count_semncnt (struct sem_array * sma, ushort semnum)
2210 struct sem_queue * q;
2211
2212 semncnt = 0;
2213 - list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) {
2214 + list_for_each_entry(q, &sma->sem_base[semnum].pending_alter, list) {
2215 struct sembuf * sops = q->sops;
2216 BUG_ON(sops->sem_num != semnum);
2217 if ((sops->sem_op < 0) && !(sops->sem_flg & IPC_NOWAIT))
2218 semncnt++;
2219 }
2220
2221 - list_for_each_entry(q, &sma->sem_pending, list) {
2222 + list_for_each_entry(q, &sma->pending_alter, list) {
2223 struct sembuf * sops = q->sops;
2224 int nsops = q->nsops;
2225 int i;
2226 @@ -839,14 +1030,14 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
2227 struct sem_queue * q;
2228
2229 semzcnt = 0;
2230 - list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) {
2231 + list_for_each_entry(q, &sma->sem_base[semnum].pending_const, list) {
2232 struct sembuf * sops = q->sops;
2233 BUG_ON(sops->sem_num != semnum);
2234 if ((sops->sem_op == 0) && !(sops->sem_flg & IPC_NOWAIT))
2235 semzcnt++;
2236 }
2237
2238 - list_for_each_entry(q, &sma->sem_pending, list) {
2239 + list_for_each_entry(q, &sma->pending_const, list) {
2240 struct sembuf * sops = q->sops;
2241 int nsops = q->nsops;
2242 int i;
2243 @@ -859,8 +1050,8 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
2244 return semzcnt;
2245 }
2246
2247 -/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked
2248 - * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex
2249 +/* Free a semaphore set. freeary() is called with sem_ids.rwsem locked
2250 + * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem
2251 * remains locked on exit.
2252 */
2253 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
2254 @@ -872,7 +1063,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
2255 int i;
2256
2257 /* Free the existing undo structures for this semaphore set. */
2258 - assert_spin_locked(&sma->sem_perm.lock);
2259 + ipc_assert_locked_object(&sma->sem_perm);
2260 list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {
2261 list_del(&un->list_id);
2262 spin_lock(&un->ulp->lock);
2263 @@ -884,13 +1075,22 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
2264
2265 /* Wake up all pending processes and let them fail with EIDRM. */
2266 INIT_LIST_HEAD(&tasks);
2267 - list_for_each_entry_safe(q, tq, &sma->sem_pending, list) {
2268 + list_for_each_entry_safe(q, tq, &sma->pending_const, list) {
2269 + unlink_queue(sma, q);
2270 + wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
2271 + }
2272 +
2273 + list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
2274 unlink_queue(sma, q);
2275 wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
2276 }
2277 for (i = 0; i < sma->sem_nsems; i++) {
2278 struct sem *sem = sma->sem_base + i;
2279 - list_for_each_entry_safe(q, tq, &sem->sem_pending, list) {
2280 + list_for_each_entry_safe(q, tq, &sem->pending_const, list) {
2281 + unlink_queue(sma, q);
2282 + wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
2283 + }
2284 + list_for_each_entry_safe(q, tq, &sem->pending_alter, list) {
2285 unlink_queue(sma, q);
2286 wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
2287 }
2288 @@ -903,8 +1103,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
2289
2290 wake_up_sem_queue_do(&tasks);
2291 ns->used_sems -= sma->sem_nsems;
2292 - security_sem_free(sma);
2293 - ipc_rcu_putref(sma);
2294 + ipc_rcu_putref(sma, sem_rcu_free);
2295 }
2296
2297 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
2298 @@ -931,6 +1130,21 @@ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in,
2299 }
2300 }
2301
2302 +static time_t get_semotime(struct sem_array *sma)
2303 +{
2304 + int i;
2305 + time_t res;
2306 +
2307 + res = sma->sem_base[0].sem_otime;
2308 + for (i = 1; i < sma->sem_nsems; i++) {
2309 + time_t to = sma->sem_base[i].sem_otime;
2310 +
2311 + if (to > res)
2312 + res = to;
2313 + }
2314 + return res;
2315 +}
2316 +
2317 static int semctl_nolock(struct ipc_namespace *ns, int semid,
2318 int cmd, int version, void __user *p)
2319 {
2320 @@ -957,7 +1171,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
2321 seminfo.semmnu = SEMMNU;
2322 seminfo.semmap = SEMMAP;
2323 seminfo.semume = SEMUME;
2324 - down_read(&sem_ids(ns).rw_mutex);
2325 + down_read(&sem_ids(ns).rwsem);
2326 if (cmd == SEM_INFO) {
2327 seminfo.semusz = sem_ids(ns).in_use;
2328 seminfo.semaem = ns->used_sems;
2329 @@ -966,7 +1180,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
2330 seminfo.semaem = SEMAEM;
2331 }
2332 max_id = ipc_get_maxid(&sem_ids(ns));
2333 - up_read(&sem_ids(ns).rw_mutex);
2334 + up_read(&sem_ids(ns).rwsem);
2335 if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
2336 return -EFAULT;
2337 return (max_id < 0) ? 0: max_id;
2338 @@ -1004,9 +1218,9 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
2339 goto out_unlock;
2340
2341 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
2342 - tbuf.sem_otime = sma->sem_otime;
2343 - tbuf.sem_ctime = sma->sem_ctime;
2344 - tbuf.sem_nsems = sma->sem_nsems;
2345 + tbuf.sem_otime = get_semotime(sma);
2346 + tbuf.sem_ctime = sma->sem_ctime;
2347 + tbuf.sem_nsems = sma->sem_nsems;
2348 rcu_read_unlock();
2349 if (copy_semid_to_user(p, &tbuf, version))
2350 return -EFAULT;
2351 @@ -1070,7 +1284,7 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
2352
2353 curr = &sma->sem_base[semnum];
2354
2355 - assert_spin_locked(&sma->sem_perm.lock);
2356 + ipc_assert_locked_object(&sma->sem_perm);
2357 list_for_each_entry(un, &sma->list_id, list_id)
2358 un->semadj[semnum] = 0;
2359
2360 @@ -1133,7 +1347,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
2361 rcu_read_unlock();
2362 sem_io = ipc_alloc(sizeof(ushort)*nsems);
2363 if(sem_io == NULL) {
2364 - sem_putref(sma);
2365 + ipc_rcu_putref(sma, ipc_rcu_free);
2366 return -ENOMEM;
2367 }
2368
2369 @@ -1169,20 +1383,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
2370 if(nsems > SEMMSL_FAST) {
2371 sem_io = ipc_alloc(sizeof(ushort)*nsems);
2372 if(sem_io == NULL) {
2373 - sem_putref(sma);
2374 + ipc_rcu_putref(sma, ipc_rcu_free);
2375 return -ENOMEM;
2376 }
2377 }
2378
2379 if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) {
2380 - sem_putref(sma);
2381 + ipc_rcu_putref(sma, ipc_rcu_free);
2382 err = -EFAULT;
2383 goto out_free;
2384 }
2385
2386 for (i = 0; i < nsems; i++) {
2387 if (sem_io[i] > SEMVMX) {
2388 - sem_putref(sma);
2389 + ipc_rcu_putref(sma, ipc_rcu_free);
2390 err = -ERANGE;
2391 goto out_free;
2392 }
2393 @@ -1199,7 +1413,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
2394 for (i = 0; i < nsems; i++)
2395 sma->sem_base[i].semval = sem_io[i];
2396
2397 - assert_spin_locked(&sma->sem_perm.lock);
2398 + ipc_assert_locked_object(&sma->sem_perm);
2399 list_for_each_entry(un, &sma->list_id, list_id) {
2400 for (i = 0; i < nsems; i++)
2401 un->semadj[i] = 0;
2402 @@ -1272,9 +1486,9 @@ copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version)
2403 }
2404
2405 /*
2406 - * This function handles some semctl commands which require the rw_mutex
2407 + * This function handles some semctl commands which require the rwsem
2408 * to be held in write mode.
2409 - * NOTE: no locks must be held, the rw_mutex is taken inside this function.
2410 + * NOTE: no locks must be held, the rwsem is taken inside this function.
2411 */
2412 static int semctl_down(struct ipc_namespace *ns, int semid,
2413 int cmd, int version, void __user *p)
2414 @@ -1289,42 +1503,46 @@ static int semctl_down(struct ipc_namespace *ns, int semid,
2415 return -EFAULT;
2416 }
2417
2418 + down_write(&sem_ids(ns).rwsem);
2419 + rcu_read_lock();
2420 +
2421 ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd,
2422 &semid64.sem_perm, 0);
2423 - if (IS_ERR(ipcp))
2424 - return PTR_ERR(ipcp);
2425 + if (IS_ERR(ipcp)) {
2426 + err = PTR_ERR(ipcp);
2427 + goto out_unlock1;
2428 + }
2429
2430 sma = container_of(ipcp, struct sem_array, sem_perm);
2431
2432 err = security_sem_semctl(sma, cmd);
2433 - if (err) {
2434 - rcu_read_unlock();
2435 - goto out_up;
2436 - }
2437 + if (err)
2438 + goto out_unlock1;
2439
2440 - switch(cmd){
2441 + switch (cmd) {
2442 case IPC_RMID:
2443 sem_lock(sma, NULL, -1);
2444 + /* freeary unlocks the ipc object and rcu */
2445 freeary(ns, ipcp);
2446 goto out_up;
2447 case IPC_SET:
2448 sem_lock(sma, NULL, -1);
2449 err = ipc_update_perm(&semid64.sem_perm, ipcp);
2450 if (err)
2451 - goto out_unlock;
2452 + goto out_unlock0;
2453 sma->sem_ctime = get_seconds();
2454 break;
2455 default:
2456 - rcu_read_unlock();
2457 err = -EINVAL;
2458 - goto out_up;
2459 + goto out_unlock1;
2460 }
2461
2462 -out_unlock:
2463 +out_unlock0:
2464 sem_unlock(sma, -1);
2465 +out_unlock1:
2466 rcu_read_unlock();
2467 out_up:
2468 - up_write(&sem_ids(ns).rw_mutex);
2469 + up_write(&sem_ids(ns).rwsem);
2470 return err;
2471 }
2472
2473 @@ -1466,7 +1684,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
2474 /* step 2: allocate new undo structure */
2475 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
2476 if (!new) {
2477 - sem_putref(sma);
2478 + ipc_rcu_putref(sma, ipc_rcu_free);
2479 return ERR_PTR(-ENOMEM);
2480 }
2481
2482 @@ -1496,7 +1714,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
2483 new->semid = semid;
2484 assert_spin_locked(&ulp->lock);
2485 list_add_rcu(&new->list_proc, &ulp->list_proc);
2486 - assert_spin_locked(&sma->sem_perm.lock);
2487 + ipc_assert_locked_object(&sma->sem_perm);
2488 list_add(&new->list_id, &sma->list_id);
2489 un = new;
2490
2491 @@ -1533,7 +1751,6 @@ static int get_queue_result(struct sem_queue *q)
2492 return error;
2493 }
2494
2495 -
2496 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
2497 unsigned, nsops, const struct timespec __user *, timeout)
2498 {
2499 @@ -1631,13 +1848,19 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
2500 if (un && un->semid == -1)
2501 goto out_unlock_free;
2502
2503 - error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
2504 - if (error <= 0) {
2505 - if (alter && error == 0)
2506 + error = perform_atomic_semop(sma, sops, nsops, un,
2507 + task_tgid_vnr(current));
2508 + if (error == 0) {
2509 + /* If the operation was successful, then do
2510 + * the required updates.
2511 + */
2512 + if (alter)
2513 do_smart_update(sma, sops, nsops, 1, &tasks);
2514 -
2515 - goto out_unlock_free;
2516 + else
2517 + set_semotime(sma, sops);
2518 }
2519 + if (error <= 0)
2520 + goto out_unlock_free;
2521
2522 /* We need to sleep on this operation, so we put the current
2523 * task into the pending queue and go to sleep.
2524 @@ -1653,15 +1876,27 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
2525 struct sem *curr;
2526 curr = &sma->sem_base[sops->sem_num];
2527
2528 - if (alter)
2529 - list_add_tail(&queue.list, &curr->sem_pending);
2530 - else
2531 - list_add(&queue.list, &curr->sem_pending);
2532 + if (alter) {
2533 + if (sma->complex_count) {
2534 + list_add_tail(&queue.list,
2535 + &sma->pending_alter);
2536 + } else {
2537 +
2538 + list_add_tail(&queue.list,
2539 + &curr->pending_alter);
2540 + }
2541 + } else {
2542 + list_add_tail(&queue.list, &curr->pending_const);
2543 + }
2544 } else {
2545 + if (!sma->complex_count)
2546 + merge_queues(sma);
2547 +
2548 if (alter)
2549 - list_add_tail(&queue.list, &sma->sem_pending);
2550 + list_add_tail(&queue.list, &sma->pending_alter);
2551 else
2552 - list_add(&queue.list, &sma->sem_pending);
2553 + list_add_tail(&queue.list, &sma->pending_const);
2554 +
2555 sma->complex_count++;
2556 }
2557
2558 @@ -1833,7 +2068,7 @@ void exit_sem(struct task_struct *tsk)
2559 }
2560
2561 /* remove un from the linked lists */
2562 - assert_spin_locked(&sma->sem_perm.lock);
2563 + ipc_assert_locked_object(&sma->sem_perm);
2564 list_del(&un->list_id);
2565
2566 spin_lock(&ulp->lock);
2567 @@ -1882,6 +2117,17 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
2568 {
2569 struct user_namespace *user_ns = seq_user_ns(s);
2570 struct sem_array *sma = it;
2571 + time_t sem_otime;
2572 +
2573 + /*
2574 + * The proc interface isn't aware of sem_lock(), it calls
2575 + * ipc_lock_object() directly (in sysvipc_find_ipc).
2576 + * In order to stay compatible with sem_lock(), we must wait until
2577 + * all simple semop() calls have left their critical regions.
2578 + */
2579 + sem_wait_array(sma);
2580 +
2581 + sem_otime = get_semotime(sma);
2582
2583 return seq_printf(s,
2584 "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n",
2585 @@ -1893,7 +2139,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
2586 from_kgid_munged(user_ns, sma->sem_perm.gid),
2587 from_kuid_munged(user_ns, sma->sem_perm.cuid),
2588 from_kgid_munged(user_ns, sma->sem_perm.cgid),
2589 - sma->sem_otime,
2590 + sem_otime,
2591 sma->sem_ctime);
2592 }
2593 #endif
2594 diff --git a/ipc/shm.c b/ipc/shm.c
2595 index 7e199fa..7b87bea 100644
2596 --- a/ipc/shm.c
2597 +++ b/ipc/shm.c
2598 @@ -19,6 +19,9 @@
2599 * namespaces support
2600 * OpenVZ, SWsoft Inc.
2601 * Pavel Emelianov <xemul@openvz.org>
2602 + *
2603 + * Better ipc lock (kern_ipc_perm.lock) handling
2604 + * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
2605 */
2606
2607 #include <linux/slab.h>
2608 @@ -80,8 +83,8 @@ void shm_init_ns(struct ipc_namespace *ns)
2609 }
2610
2611 /*
2612 - * Called with shm_ids.rw_mutex (writer) and the shp structure locked.
2613 - * Only shm_ids.rw_mutex remains locked on exit.
2614 + * Called with shm_ids.rwsem (writer) and the shp structure locked.
2615 + * Only shm_ids.rwsem remains locked on exit.
2616 */
2617 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
2618 {
2619 @@ -124,8 +127,28 @@ void __init shm_init (void)
2620 IPC_SHM_IDS, sysvipc_shm_proc_show);
2621 }
2622
2623 +static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
2624 +{
2625 + struct kern_ipc_perm *ipcp = ipc_obtain_object(&shm_ids(ns), id);
2626 +
2627 + if (IS_ERR(ipcp))
2628 + return ERR_CAST(ipcp);
2629 +
2630 + return container_of(ipcp, struct shmid_kernel, shm_perm);
2631 +}
2632 +
2633 +static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
2634 +{
2635 + struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
2636 +
2637 + if (IS_ERR(ipcp))
2638 + return ERR_CAST(ipcp);
2639 +
2640 + return container_of(ipcp, struct shmid_kernel, shm_perm);
2641 +}
2642 +
2643 /*
2644 - * shm_lock_(check_) routines are called in the paths where the rw_mutex
2645 + * shm_lock_(check_) routines are called in the paths where the rwsem
2646 * is not necessarily held.
2647 */
2648 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
2649 @@ -141,18 +164,16 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
2650 static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
2651 {
2652 rcu_read_lock();
2653 - spin_lock(&ipcp->shm_perm.lock);
2654 + ipc_lock_object(&ipcp->shm_perm);
2655 }
2656
2657 -static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns,
2658 - int id)
2659 +static void shm_rcu_free(struct rcu_head *head)
2660 {
2661 - struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id);
2662 -
2663 - if (IS_ERR(ipcp))
2664 - return (struct shmid_kernel *)ipcp;
2665 + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
2666 + struct shmid_kernel *shp = ipc_rcu_to_struct(p);
2667
2668 - return container_of(ipcp, struct shmid_kernel, shm_perm);
2669 + security_shm_free(shp);
2670 + ipc_rcu_free(head);
2671 }
2672
2673 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
2674 @@ -182,7 +203,7 @@ static void shm_open(struct vm_area_struct *vma)
2675 * @ns: namespace
2676 * @shp: struct to free
2677 *
2678 - * It has to be called with shp and shm_ids.rw_mutex (writer) locked,
2679 + * It has to be called with shp and shm_ids.rwsem (writer) locked,
2680 * but returns with shp unlocked and freed.
2681 */
2682 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
2683 @@ -196,8 +217,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
2684 user_shm_unlock(file_inode(shp->shm_file)->i_size,
2685 shp->mlock_user);
2686 fput (shp->shm_file);
2687 - security_shm_free(shp);
2688 - ipc_rcu_putref(shp);
2689 + ipc_rcu_putref(shp, shm_rcu_free);
2690 }
2691
2692 /*
2693 @@ -230,7 +250,7 @@ static void shm_close(struct vm_area_struct *vma)
2694 struct shmid_kernel *shp;
2695 struct ipc_namespace *ns = sfd->ns;
2696
2697 - down_write(&shm_ids(ns).rw_mutex);
2698 + down_write(&shm_ids(ns).rwsem);
2699 /* remove from the list of attaches of the shm segment */
2700 shp = shm_lock(ns, sfd->id);
2701 BUG_ON(IS_ERR(shp));
2702 @@ -241,10 +261,10 @@ static void shm_close(struct vm_area_struct *vma)
2703 shm_destroy(ns, shp);
2704 else
2705 shm_unlock(shp);
2706 - up_write(&shm_ids(ns).rw_mutex);
2707 + up_write(&shm_ids(ns).rwsem);
2708 }
2709
2710 -/* Called with ns->shm_ids(ns).rw_mutex locked */
2711 +/* Called with ns->shm_ids(ns).rwsem locked */
2712 static int shm_try_destroy_current(int id, void *p, void *data)
2713 {
2714 struct ipc_namespace *ns = data;
2715 @@ -275,7 +295,7 @@ static int shm_try_destroy_current(int id, void *p, void *data)
2716 return 0;
2717 }
2718
2719 -/* Called with ns->shm_ids(ns).rw_mutex locked */
2720 +/* Called with ns->shm_ids(ns).rwsem locked */
2721 static int shm_try_destroy_orphaned(int id, void *p, void *data)
2722 {
2723 struct ipc_namespace *ns = data;
2724 @@ -286,7 +306,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
2725 * We want to destroy segments without users and with already
2726 * exit'ed originating process.
2727 *
2728 - * As shp->* are changed under rw_mutex, it's safe to skip shp locking.
2729 + * As shp->* are changed under rwsem, it's safe to skip shp locking.
2730 */
2731 if (shp->shm_creator != NULL)
2732 return 0;
2733 @@ -300,10 +320,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
2734
2735 void shm_destroy_orphaned(struct ipc_namespace *ns)
2736 {
2737 - down_write(&shm_ids(ns).rw_mutex);
2738 + down_write(&shm_ids(ns).rwsem);
2739 if (shm_ids(ns).in_use)
2740 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
2741 - up_write(&shm_ids(ns).rw_mutex);
2742 + up_write(&shm_ids(ns).rwsem);
2743 }
2744
2745
2746 @@ -315,10 +335,10 @@ void exit_shm(struct task_struct *task)
2747 return;
2748
2749 /* Destroy all already created segments, but not mapped yet */
2750 - down_write(&shm_ids(ns).rw_mutex);
2751 + down_write(&shm_ids(ns).rwsem);
2752 if (shm_ids(ns).in_use)
2753 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
2754 - up_write(&shm_ids(ns).rw_mutex);
2755 + up_write(&shm_ids(ns).rwsem);
2756 }
2757
2758 static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2759 @@ -452,7 +472,7 @@ static const struct vm_operations_struct shm_vm_ops = {
2760 * @ns: namespace
2761 * @params: ptr to the structure that contains key, size and shmflg
2762 *
2763 - * Called with shm_ids.rw_mutex held as a writer.
2764 + * Called with shm_ids.rwsem held as a writer.
2765 */
2766
2767 static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
2768 @@ -485,7 +505,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
2769 shp->shm_perm.security = NULL;
2770 error = security_shm_alloc(shp);
2771 if (error) {
2772 - ipc_rcu_putref(shp);
2773 + ipc_rcu_putref(shp, ipc_rcu_free);
2774 return error;
2775 }
2776
2777 @@ -535,6 +555,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
2778 shp->shm_nattch = 0;
2779 shp->shm_file = file;
2780 shp->shm_creator = current;
2781 +
2782 /*
2783 * shmid gets reported as "inode#" in /proc/pid/maps.
2784 * proc-ps tools use this. Changing this will break them.
2785 @@ -543,7 +564,9 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
2786
2787 ns->shm_tot += numpages;
2788 error = shp->shm_perm.id;
2789 - shm_unlock(shp);
2790 +
2791 + ipc_unlock_object(&shp->shm_perm);
2792 + rcu_read_unlock();
2793 return error;
2794
2795 no_id:
2796 @@ -551,13 +574,12 @@ no_id:
2797 user_shm_unlock(size, shp->mlock_user);
2798 fput(file);
2799 no_file:
2800 - security_shm_free(shp);
2801 - ipc_rcu_putref(shp);
2802 + ipc_rcu_putref(shp, shm_rcu_free);
2803 return error;
2804 }
2805
2806 /*
2807 - * Called with shm_ids.rw_mutex and ipcp locked.
2808 + * Called with shm_ids.rwsem and ipcp locked.
2809 */
2810 static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
2811 {
2812 @@ -568,7 +590,7 @@ static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
2813 }
2814
2815 /*
2816 - * Called with shm_ids.rw_mutex and ipcp locked.
2817 + * Called with shm_ids.rwsem and ipcp locked.
2818 */
2819 static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
2820 struct ipc_params *params)
2821 @@ -681,7 +703,7 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf
2822
2823 /*
2824 * Calculate and add used RSS and swap pages of a shm.
2825 - * Called with shm_ids.rw_mutex held as a reader
2826 + * Called with shm_ids.rwsem held as a reader
2827 */
2828 static void shm_add_rss_swap(struct shmid_kernel *shp,
2829 unsigned long *rss_add, unsigned long *swp_add)
2830 @@ -708,7 +730,7 @@ static void shm_add_rss_swap(struct shmid_kernel *shp,
2831 }
2832
2833 /*
2834 - * Called with shm_ids.rw_mutex held as a reader
2835 + * Called with shm_ids.rwsem held as a reader
2836 */
2837 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
2838 unsigned long *swp)
2839 @@ -737,9 +759,9 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
2840 }
2841
2842 /*
2843 - * This function handles some shmctl commands which require the rw_mutex
2844 + * This function handles some shmctl commands which require the rwsem
2845 * to be held in write mode.
2846 - * NOTE: no locks must be held, the rw_mutex is taken inside this function.
2847 + * NOTE: no locks must be held, the rwsem is taken inside this function.
2848 */
2849 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
2850 struct shmid_ds __user *buf, int version)
2851 @@ -754,59 +776,67 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
2852 return -EFAULT;
2853 }
2854
2855 - ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd,
2856 - &shmid64.shm_perm, 0);
2857 - if (IS_ERR(ipcp))
2858 - return PTR_ERR(ipcp);
2859 + down_write(&shm_ids(ns).rwsem);
2860 + rcu_read_lock();
2861 +
2862 + ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd,
2863 + &shmid64.shm_perm, 0);
2864 + if (IS_ERR(ipcp)) {
2865 + err = PTR_ERR(ipcp);
2866 + goto out_unlock1;
2867 + }
2868
2869 shp = container_of(ipcp, struct shmid_kernel, shm_perm);
2870
2871 err = security_shm_shmctl(shp, cmd);
2872 if (err)
2873 - goto out_unlock;
2874 + goto out_unlock1;
2875 +
2876 switch (cmd) {
2877 case IPC_RMID:
2878 + ipc_lock_object(&shp->shm_perm);
2879 + /* do_shm_rmid unlocks the ipc object and rcu */
2880 do_shm_rmid(ns, ipcp);
2881 goto out_up;
2882 case IPC_SET:
2883 + ipc_lock_object(&shp->shm_perm);
2884 err = ipc_update_perm(&shmid64.shm_perm, ipcp);
2885 if (err)
2886 - goto out_unlock;
2887 + goto out_unlock0;
2888 shp->shm_ctim = get_seconds();
2889 break;
2890 default:
2891 err = -EINVAL;
2892 + goto out_unlock1;
2893 }
2894 -out_unlock:
2895 - shm_unlock(shp);
2896 +
2897 +out_unlock0:
2898 + ipc_unlock_object(&shp->shm_perm);
2899 +out_unlock1:
2900 + rcu_read_unlock();
2901 out_up:
2902 - up_write(&shm_ids(ns).rw_mutex);
2903 + up_write(&shm_ids(ns).rwsem);
2904 return err;
2905 }
2906
2907 -SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
2908 +static int shmctl_nolock(struct ipc_namespace *ns, int shmid,
2909 + int cmd, int version, void __user *buf)
2910 {
2911 + int err;
2912 struct shmid_kernel *shp;
2913 - int err, version;
2914 - struct ipc_namespace *ns;
2915
2916 - if (cmd < 0 || shmid < 0) {
2917 - err = -EINVAL;
2918 - goto out;
2919 + /* preliminary security checks for *_INFO */
2920 + if (cmd == IPC_INFO || cmd == SHM_INFO) {
2921 + err = security_shm_shmctl(NULL, cmd);
2922 + if (err)
2923 + return err;
2924 }
2925
2926 - version = ipc_parse_version(&cmd);
2927 - ns = current->nsproxy->ipc_ns;
2928 -
2929 - switch (cmd) { /* replace with proc interface ? */
2930 + switch (cmd) {
2931 case IPC_INFO:
2932 {
2933 struct shminfo64 shminfo;
2934
2935 - err = security_shm_shmctl(NULL, cmd);
2936 - if (err)
2937 - return err;
2938 -
2939 memset(&shminfo, 0, sizeof(shminfo));
2940 shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
2941 shminfo.shmmax = ns->shm_ctlmax;
2942 @@ -816,9 +846,9 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
2943 if(copy_shminfo_to_user (buf, &shminfo, version))
2944 return -EFAULT;
2945
2946 - down_read(&shm_ids(ns).rw_mutex);
2947 + down_read(&shm_ids(ns).rwsem);
2948 err = ipc_get_maxid(&shm_ids(ns));
2949 - up_read(&shm_ids(ns).rw_mutex);
2950 + up_read(&shm_ids(ns).rwsem);
2951
2952 if(err<0)
2953 err = 0;
2954 @@ -828,19 +858,15 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
2955 {
2956 struct shm_info shm_info;
2957
2958 - err = security_shm_shmctl(NULL, cmd);
2959 - if (err)
2960 - return err;
2961 -
2962 memset(&shm_info, 0, sizeof(shm_info));
2963 - down_read(&shm_ids(ns).rw_mutex);
2964 + down_read(&shm_ids(ns).rwsem);
2965 shm_info.used_ids = shm_ids(ns).in_use;
2966 shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
2967 shm_info.shm_tot = ns->shm_tot;
2968 shm_info.swap_attempts = 0;
2969 shm_info.swap_successes = 0;
2970 err = ipc_get_maxid(&shm_ids(ns));
2971 - up_read(&shm_ids(ns).rw_mutex);
2972 + up_read(&shm_ids(ns).rwsem);
2973 if (copy_to_user(buf, &shm_info, sizeof(shm_info))) {
2974 err = -EFAULT;
2975 goto out;
2976 @@ -855,27 +881,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
2977 struct shmid64_ds tbuf;
2978 int result;
2979
2980 + rcu_read_lock();
2981 if (cmd == SHM_STAT) {
2982 - shp = shm_lock(ns, shmid);
2983 + shp = shm_obtain_object(ns, shmid);
2984 if (IS_ERR(shp)) {
2985 err = PTR_ERR(shp);
2986 - goto out;
2987 + goto out_unlock;
2988 }
2989 result = shp->shm_perm.id;
2990 } else {
2991 - shp = shm_lock_check(ns, shmid);
2992 + shp = shm_obtain_object_check(ns, shmid);
2993 if (IS_ERR(shp)) {
2994 err = PTR_ERR(shp);
2995 - goto out;
2996 + goto out_unlock;
2997 }
2998 result = 0;
2999 }
3000 +
3001 err = -EACCES;
3002 if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
3003 goto out_unlock;
3004 +
3005 err = security_shm_shmctl(shp, cmd);
3006 if (err)
3007 goto out_unlock;
3008 +
3009 memset(&tbuf, 0, sizeof(tbuf));
3010 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
3011 tbuf.shm_segsz = shp->shm_segsz;
3012 @@ -885,43 +915,76 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
3013 tbuf.shm_cpid = shp->shm_cprid;
3014 tbuf.shm_lpid = shp->shm_lprid;
3015 tbuf.shm_nattch = shp->shm_nattch;
3016 - shm_unlock(shp);
3017 - if(copy_shmid_to_user (buf, &tbuf, version))
3018 + rcu_read_unlock();
3019 +
3020 + if (copy_shmid_to_user(buf, &tbuf, version))
3021 err = -EFAULT;
3022 else
3023 err = result;
3024 goto out;
3025 }
3026 + default:
3027 + return -EINVAL;
3028 + }
3029 +
3030 +out_unlock:
3031 + rcu_read_unlock();
3032 +out:
3033 + return err;
3034 +}
3035 +
3036 +SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
3037 +{
3038 + struct shmid_kernel *shp;
3039 + int err, version;
3040 + struct ipc_namespace *ns;
3041 +
3042 + if (cmd < 0 || shmid < 0)
3043 + return -EINVAL;
3044 +
3045 + version = ipc_parse_version(&cmd);
3046 + ns = current->nsproxy->ipc_ns;
3047 +
3048 + switch (cmd) {
3049 + case IPC_INFO:
3050 + case SHM_INFO:
3051 + case SHM_STAT:
3052 + case IPC_STAT:
3053 + return shmctl_nolock(ns, shmid, cmd, version, buf);
3054 + case IPC_RMID:
3055 + case IPC_SET:
3056 + return shmctl_down(ns, shmid, cmd, buf, version);
3057 case SHM_LOCK:
3058 case SHM_UNLOCK:
3059 {
3060 struct file *shm_file;
3061
3062 - shp = shm_lock_check(ns, shmid);
3063 + rcu_read_lock();
3064 + shp = shm_obtain_object_check(ns, shmid);
3065 if (IS_ERR(shp)) {
3066 err = PTR_ERR(shp);
3067 - goto out;
3068 + goto out_unlock1;
3069 }
3070
3071 audit_ipc_obj(&(shp->shm_perm));
3072 + err = security_shm_shmctl(shp, cmd);
3073 + if (err)
3074 + goto out_unlock1;
3075
3076 + ipc_lock_object(&shp->shm_perm);
3077 if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
3078 kuid_t euid = current_euid();
3079 err = -EPERM;
3080 if (!uid_eq(euid, shp->shm_perm.uid) &&
3081 !uid_eq(euid, shp->shm_perm.cuid))
3082 - goto out_unlock;
3083 + goto out_unlock0;
3084 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK))
3085 - goto out_unlock;
3086 + goto out_unlock0;
3087 }
3088
3089 - err = security_shm_shmctl(shp, cmd);
3090 - if (err)
3091 - goto out_unlock;
3092 -
3093 shm_file = shp->shm_file;
3094 if (is_file_hugepages(shm_file))
3095 - goto out_unlock;
3096 + goto out_unlock0;
3097
3098 if (cmd == SHM_LOCK) {
3099 struct user_struct *user = current_user();
3100 @@ -930,32 +993,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
3101 shp->shm_perm.mode |= SHM_LOCKED;
3102 shp->mlock_user = user;
3103 }
3104 - goto out_unlock;
3105 + goto out_unlock0;
3106 }
3107
3108 /* SHM_UNLOCK */
3109 if (!(shp->shm_perm.mode & SHM_LOCKED))
3110 - goto out_unlock;
3111 + goto out_unlock0;
3112 shmem_lock(shm_file, 0, shp->mlock_user);
3113 shp->shm_perm.mode &= ~SHM_LOCKED;
3114 shp->mlock_user = NULL;
3115 get_file(shm_file);
3116 - shm_unlock(shp);
3117 + ipc_unlock_object(&shp->shm_perm);
3118 + rcu_read_unlock();
3119 shmem_unlock_mapping(shm_file->f_mapping);
3120 +
3121 fput(shm_file);
3122 - goto out;
3123 - }
3124 - case IPC_RMID:
3125 - case IPC_SET:
3126 - err = shmctl_down(ns, shmid, cmd, buf, version);
3127 return err;
3128 + }
3129 default:
3130 return -EINVAL;
3131 }
3132
3133 -out_unlock:
3134 - shm_unlock(shp);
3135 -out:
3136 +out_unlock0:
3137 + ipc_unlock_object(&shp->shm_perm);
3138 +out_unlock1:
3139 + rcu_read_unlock();
3140 return err;
3141 }
3142
3143 @@ -1023,10 +1085,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
3144 * additional creator id...
3145 */
3146 ns = current->nsproxy->ipc_ns;
3147 - shp = shm_lock_check(ns, shmid);
3148 + rcu_read_lock();
3149 + shp = shm_obtain_object_check(ns, shmid);
3150 if (IS_ERR(shp)) {
3151 err = PTR_ERR(shp);
3152 - goto out;
3153 + goto out_unlock;
3154 }
3155
3156 err = -EACCES;
3157 @@ -1037,24 +1100,31 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
3158 if (err)
3159 goto out_unlock;
3160
3161 + ipc_lock_object(&shp->shm_perm);
3162 path = shp->shm_file->f_path;
3163 path_get(&path);
3164 shp->shm_nattch++;
3165 size = i_size_read(path.dentry->d_inode);
3166 - shm_unlock(shp);
3167 + ipc_unlock_object(&shp->shm_perm);
3168 + rcu_read_unlock();
3169
3170 err = -ENOMEM;
3171 sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
3172 - if (!sfd)
3173 - goto out_put_dentry;
3174 + if (!sfd) {
3175 + path_put(&path);
3176 + goto out_nattch;
3177 + }
3178
3179 file = alloc_file(&path, f_mode,
3180 is_file_hugepages(shp->shm_file) ?
3181 &shm_file_operations_huge :
3182 &shm_file_operations);
3183 err = PTR_ERR(file);
3184 - if (IS_ERR(file))
3185 - goto out_free;
3186 + if (IS_ERR(file)) {
3187 + kfree(sfd);
3188 + path_put(&path);
3189 + goto out_nattch;
3190 + }
3191
3192 file->private_data = sfd;
3193 file->f_mapping = shp->shm_file->f_mapping;
3194 @@ -1080,7 +1150,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
3195 addr > current->mm->start_stack - size - PAGE_SIZE * 5)
3196 goto invalid;
3197 }
3198 -
3199 +
3200 addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate);
3201 *raddr = addr;
3202 err = 0;
3203 @@ -1095,7 +1165,7 @@ out_fput:
3204 fput(file);
3205
3206 out_nattch:
3207 - down_write(&shm_ids(ns).rw_mutex);
3208 + down_write(&shm_ids(ns).rwsem);
3209 shp = shm_lock(ns, shmid);
3210 BUG_ON(IS_ERR(shp));
3211 shp->shm_nattch--;
3212 @@ -1103,20 +1173,13 @@ out_nattch:
3213 shm_destroy(ns, shp);
3214 else
3215 shm_unlock(shp);
3216 - up_write(&shm_ids(ns).rw_mutex);
3217 -
3218 -out:
3219 + up_write(&shm_ids(ns).rwsem);
3220 return err;
3221
3222 out_unlock:
3223 - shm_unlock(shp);
3224 - goto out;
3225 -
3226 -out_free:
3227 - kfree(sfd);
3228 -out_put_dentry:
3229 - path_put(&path);
3230 - goto out_nattch;
3231 + rcu_read_unlock();
3232 +out:
3233 + return err;
3234 }
3235
3236 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
3237 @@ -1221,8 +1284,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
3238 #else /* CONFIG_MMU */
3239 /* under NOMMU conditions, the exact address to be destroyed must be
3240 * given */
3241 - retval = -EINVAL;
3242 - if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
3243 + if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
3244 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
3245 retval = 0;
3246 }
3247 diff --git a/ipc/util.c b/ipc/util.c
3248 index 809ec5e..fdb8ae7 100644
3249 --- a/ipc/util.c
3250 +++ b/ipc/util.c
3251 @@ -15,6 +15,14 @@
3252 * Jun 2006 - namespaces ssupport
3253 * OpenVZ, SWsoft Inc.
3254 * Pavel Emelianov <xemul@openvz.org>
3255 + *
3256 + * General sysv ipc locking scheme:
3257 + * when doing ipc id lookups, take the ids->rwsem
3258 + * rcu_read_lock()
3259 + * obtain the ipc object (kern_ipc_perm)
3260 + * perform security, capabilities, auditing and permission checks, etc.
3261 + * acquire the ipc lock (kern_ipc_perm.lock) throught ipc_lock_object()
3262 + * perform data updates (ie: SET, RMID, LOCK/UNLOCK commands)
3263 */
3264
3265 #include <linux/mm.h>
3266 @@ -119,7 +127,7 @@ __initcall(ipc_init);
3267
3268 void ipc_init_ids(struct ipc_ids *ids)
3269 {
3270 - init_rwsem(&ids->rw_mutex);
3271 + init_rwsem(&ids->rwsem);
3272
3273 ids->in_use = 0;
3274 ids->seq = 0;
3275 @@ -174,7 +182,7 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
3276 * @ids: Identifier set
3277 * @key: The key to find
3278 *
3279 - * Requires ipc_ids.rw_mutex locked.
3280 + * Requires ipc_ids.rwsem locked.
3281 * Returns the LOCKED pointer to the ipc structure if found or NULL
3282 * if not.
3283 * If key is found ipc points to the owning ipc structure
3284 @@ -197,7 +205,8 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
3285 continue;
3286 }
3287
3288 - ipc_lock_by_ptr(ipc);
3289 + rcu_read_lock();
3290 + ipc_lock_object(ipc);
3291 return ipc;
3292 }
3293
3294 @@ -208,7 +217,7 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
3295 * ipc_get_maxid - get the last assigned id
3296 * @ids: IPC identifier set
3297 *
3298 - * Called with ipc_ids.rw_mutex held.
3299 + * Called with ipc_ids.rwsem held.
3300 */
3301
3302 int ipc_get_maxid(struct ipc_ids *ids)
3303 @@ -246,9 +255,8 @@ int ipc_get_maxid(struct ipc_ids *ids)
3304 * is returned. The 'new' entry is returned in a locked state on success.
3305 * On failure the entry is not locked and a negative err-code is returned.
3306 *
3307 - * Called with ipc_ids.rw_mutex held as a writer.
3308 + * Called with writer ipc_ids.rwsem held.
3309 */
3310 -
3311 int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
3312 {
3313 kuid_t euid;
3314 @@ -313,9 +321,9 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids,
3315 {
3316 int err;
3317
3318 - down_write(&ids->rw_mutex);
3319 + down_write(&ids->rwsem);
3320 err = ops->getnew(ns, params);
3321 - up_write(&ids->rw_mutex);
3322 + up_write(&ids->rwsem);
3323 return err;
3324 }
3325
3326 @@ -332,7 +340,7 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids,
3327 *
3328 * On success, the IPC id is returned.
3329 *
3330 - * It is called with ipc_ids.rw_mutex and ipcp->lock held.
3331 + * It is called with ipc_ids.rwsem and ipcp->lock held.
3332 */
3333 static int ipc_check_perms(struct ipc_namespace *ns,
3334 struct kern_ipc_perm *ipcp,
3335 @@ -377,7 +385,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
3336 * Take the lock as a writer since we are potentially going to add
3337 * a new entry + read locks are not "upgradable"
3338 */
3339 - down_write(&ids->rw_mutex);
3340 + down_write(&ids->rwsem);
3341 ipcp = ipc_findkey(ids, params->key);
3342 if (ipcp == NULL) {
3343 /* key not used */
3344 @@ -403,7 +411,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
3345 }
3346 ipc_unlock(ipcp);
3347 }
3348 - up_write(&ids->rw_mutex);
3349 + up_write(&ids->rwsem);
3350
3351 return err;
3352 }
3353 @@ -414,7 +422,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
3354 * @ids: IPC identifier set
3355 * @ipcp: ipc perm structure containing the identifier to remove
3356 *
3357 - * ipc_ids.rw_mutex (as a writer) and the spinlock for this ID are held
3358 + * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held
3359 * before this function is called, and remain locked on the exit.
3360 */
3361
3362 @@ -466,13 +474,6 @@ void ipc_free(void* ptr, int size)
3363 kfree(ptr);
3364 }
3365
3366 -struct ipc_rcu {
3367 - struct rcu_head rcu;
3368 - atomic_t refcount;
3369 - /* "void *" makes sure alignment of following data is sane. */
3370 - void *data[0];
3371 -};
3372 -
3373 /**
3374 * ipc_rcu_alloc - allocate ipc and rcu space
3375 * @size: size desired
3376 @@ -489,35 +490,34 @@ void *ipc_rcu_alloc(int size)
3377 if (unlikely(!out))
3378 return NULL;
3379 atomic_set(&out->refcount, 1);
3380 - return out->data;
3381 + return out + 1;
3382 }
3383
3384 int ipc_rcu_getref(void *ptr)
3385 {
3386 - return atomic_inc_not_zero(&container_of(ptr, struct ipc_rcu, data)->refcount);
3387 -}
3388 + struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
3389
3390 -/**
3391 - * ipc_schedule_free - free ipc + rcu space
3392 - * @head: RCU callback structure for queued work
3393 - */
3394 -static void ipc_schedule_free(struct rcu_head *head)
3395 -{
3396 - vfree(container_of(head, struct ipc_rcu, rcu));
3397 + return atomic_inc_not_zero(&p->refcount);
3398 }
3399
3400 -void ipc_rcu_putref(void *ptr)
3401 +void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head))
3402 {
3403 - struct ipc_rcu *p = container_of(ptr, struct ipc_rcu, data);
3404 + struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
3405
3406 if (!atomic_dec_and_test(&p->refcount))
3407 return;
3408
3409 - if (is_vmalloc_addr(ptr)) {
3410 - call_rcu(&p->rcu, ipc_schedule_free);
3411 - } else {
3412 - kfree_rcu(p, rcu);
3413 - }
3414 + call_rcu(&p->rcu, func);
3415 +}
3416 +
3417 +void ipc_rcu_free(struct rcu_head *head)
3418 +{
3419 + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
3420 +
3421 + if (is_vmalloc_addr(p))
3422 + vfree(p);
3423 + else
3424 + kfree(p);
3425 }
3426
3427 /**
3428 @@ -622,7 +622,7 @@ struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id)
3429 }
3430
3431 /**
3432 - * ipc_lock - Lock an ipc structure without rw_mutex held
3433 + * ipc_lock - Lock an ipc structure without rwsem held
3434 * @ids: IPC identifier set
3435 * @id: ipc id to look for
3436 *
3437 @@ -678,22 +678,6 @@ out:
3438 return out;
3439 }
3440
3441 -struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id)
3442 -{
3443 - struct kern_ipc_perm *out;
3444 -
3445 - out = ipc_lock(ids, id);
3446 - if (IS_ERR(out))
3447 - return out;
3448 -
3449 - if (ipc_checkid(out, id)) {
3450 - ipc_unlock(out);
3451 - return ERR_PTR(-EIDRM);
3452 - }
3453 -
3454 - return out;
3455 -}
3456 -
3457 /**
3458 * ipcget - Common sys_*get() code
3459 * @ns : namsepace
3460 @@ -734,7 +718,7 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out)
3461 }
3462
3463 /**
3464 - * ipcctl_pre_down - retrieve an ipc and check permissions for some IPC_XXX cmd
3465 + * ipcctl_pre_down_nolock - retrieve an ipc and check permissions for some IPC_XXX cmd
3466 * @ns: the ipc namespace
3467 * @ids: the table of ids where to look for the ipc
3468 * @id: the id of the ipc to retrieve
3469 @@ -747,39 +731,22 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out)
3470 * It must be called without any lock held and
3471 * - retrieves the ipc with the given id in the given table.
3472 * - performs some audit and permission check, depending on the given cmd
3473 - * - returns the ipc with both ipc and rw_mutex locks held in case of success
3474 - * or an err-code without any lock held otherwise.
3475 + * - returns a pointer to the ipc object or otherwise, the corresponding error.
3476 + *
3477 + * Call holding the both the rwsem and the rcu read lock.
3478 */
3479 -struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
3480 - struct ipc_ids *ids, int id, int cmd,
3481 - struct ipc64_perm *perm, int extra_perm)
3482 -{
3483 - struct kern_ipc_perm *ipcp;
3484 -
3485 - ipcp = ipcctl_pre_down_nolock(ns, ids, id, cmd, perm, extra_perm);
3486 - if (IS_ERR(ipcp))
3487 - goto out;
3488 -
3489 - spin_lock(&ipcp->lock);
3490 -out:
3491 - return ipcp;
3492 -}
3493 -
3494 struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
3495 - struct ipc_ids *ids, int id, int cmd,
3496 - struct ipc64_perm *perm, int extra_perm)
3497 + struct ipc_ids *ids, int id, int cmd,
3498 + struct ipc64_perm *perm, int extra_perm)
3499 {
3500 kuid_t euid;
3501 int err = -EPERM;
3502 struct kern_ipc_perm *ipcp;
3503
3504 - down_write(&ids->rw_mutex);
3505 - rcu_read_lock();
3506 -
3507 ipcp = ipc_obtain_object_check(ids, id);
3508 if (IS_ERR(ipcp)) {
3509 err = PTR_ERR(ipcp);
3510 - goto out_up;
3511 + goto err;
3512 }
3513
3514 audit_ipc_obj(ipcp);
3515 @@ -790,16 +757,8 @@ struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
3516 euid = current_euid();
3517 if (uid_eq(euid, ipcp->cuid) || uid_eq(euid, ipcp->uid) ||
3518 ns_capable(ns->user_ns, CAP_SYS_ADMIN))
3519 - return ipcp;
3520 -
3521 -out_up:
3522 - /*
3523 - * Unsuccessful lookup, unlock and return
3524 - * the corresponding error.
3525 - */
3526 - rcu_read_unlock();
3527 - up_write(&ids->rw_mutex);
3528 -
3529 + return ipcp; /* successful lookup */
3530 +err:
3531 return ERR_PTR(err);
3532 }
3533
3534 @@ -856,7 +815,8 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
3535 ipc = idr_find(&ids->ipcs_idr, pos);
3536 if (ipc != NULL) {
3537 *new_pos = pos + 1;
3538 - ipc_lock_by_ptr(ipc);
3539 + rcu_read_lock();
3540 + ipc_lock_object(ipc);
3541 return ipc;
3542 }
3543 }
3544 @@ -894,7 +854,7 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
3545 * Take the lock - this will be released by the corresponding
3546 * call to stop().
3547 */
3548 - down_read(&ids->rw_mutex);
3549 + down_read(&ids->rwsem);
3550
3551 /* pos < 0 is invalid */
3552 if (*pos < 0)
3553 @@ -921,7 +881,7 @@ static void sysvipc_proc_stop(struct seq_file *s, void *it)
3554
3555 ids = &iter->ns->ids[iface->ids];
3556 /* Release the lock we took in start() */
3557 - up_read(&ids->rw_mutex);
3558 + up_read(&ids->rwsem);
3559 }
3560
3561 static int sysvipc_proc_show(struct seq_file *s, void *it)
3562 diff --git a/ipc/util.h b/ipc/util.h
3563 index 2b0bdd5..f2f5036 100644
3564 --- a/ipc/util.h
3565 +++ b/ipc/util.h
3566 @@ -47,6 +47,13 @@ static inline void msg_exit_ns(struct ipc_namespace *ns) { }
3567 static inline void shm_exit_ns(struct ipc_namespace *ns) { }
3568 #endif
3569
3570 +struct ipc_rcu {
3571 + struct rcu_head rcu;
3572 + atomic_t refcount;
3573 +} ____cacheline_aligned_in_smp;
3574 +
3575 +#define ipc_rcu_to_struct(p) ((void *)(p+1))
3576 +
3577 /*
3578 * Structure that holds the parameters needed by the ipc operations
3579 * (see after)
3580 @@ -94,10 +101,10 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
3581 #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER)
3582 #define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER)
3583
3584 -/* must be called with ids->rw_mutex acquired for writing */
3585 +/* must be called with ids->rwsem acquired for writing */
3586 int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
3587
3588 -/* must be called with ids->rw_mutex acquired for reading */
3589 +/* must be called with ids->rwsem acquired for reading */
3590 int ipc_get_maxid(struct ipc_ids *);
3591
3592 /* must be called with both locks acquired. */
3593 @@ -120,7 +127,8 @@ void ipc_free(void* ptr, int size);
3594 */
3595 void* ipc_rcu_alloc(int size);
3596 int ipc_rcu_getref(void *ptr);
3597 -void ipc_rcu_putref(void *ptr);
3598 +void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head));
3599 +void ipc_rcu_free(struct rcu_head *head);
3600
3601 struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
3602 struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id);
3603 @@ -131,9 +139,6 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out);
3604 struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
3605 struct ipc_ids *ids, int id, int cmd,
3606 struct ipc64_perm *perm, int extra_perm);
3607 -struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
3608 - struct ipc_ids *ids, int id, int cmd,
3609 - struct ipc64_perm *perm, int extra_perm);
3610
3611 #ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION
3612 /* On IA-64, we always use the "64-bit version" of the IPC structures. */
3613 @@ -159,24 +164,27 @@ static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid)
3614 return uid / SEQ_MULTIPLIER != ipcp->seq;
3615 }
3616
3617 -static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
3618 +static inline void ipc_lock_object(struct kern_ipc_perm *perm)
3619 {
3620 - rcu_read_lock();
3621 spin_lock(&perm->lock);
3622 }
3623
3624 -static inline void ipc_unlock(struct kern_ipc_perm *perm)
3625 +static inline void ipc_unlock_object(struct kern_ipc_perm *perm)
3626 {
3627 spin_unlock(&perm->lock);
3628 - rcu_read_unlock();
3629 }
3630
3631 -static inline void ipc_lock_object(struct kern_ipc_perm *perm)
3632 +static inline void ipc_assert_locked_object(struct kern_ipc_perm *perm)
3633 {
3634 - spin_lock(&perm->lock);
3635 + assert_spin_locked(&perm->lock);
3636 +}
3637 +
3638 +static inline void ipc_unlock(struct kern_ipc_perm *perm)
3639 +{
3640 + ipc_unlock_object(perm);
3641 + rcu_read_unlock();
3642 }
3643
3644 -struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id);
3645 struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id);
3646 int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
3647 struct ipc_ops *ops, struct ipc_params *params);
3648 diff --git a/mm/shmem.c b/mm/shmem.c
3649 index 5e6a842..509b393 100644
3650 --- a/mm/shmem.c
3651 +++ b/mm/shmem.c
3652 @@ -2879,14 +2879,8 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
3653
3654 /* common code */
3655
3656 -static char *shmem_dname(struct dentry *dentry, char *buffer, int buflen)
3657 -{
3658 - return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)",
3659 - dentry->d_name.name);
3660 -}
3661 -
3662 static struct dentry_operations anon_ops = {
3663 - .d_dname = shmem_dname
3664 + .d_dname = simple_dname
3665 };
3666
3667 /**
3668 diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
3669 index b5375ed..aecf088 100644
3670 --- a/sound/pci/hda/patch_hdmi.c
3671 +++ b/sound/pci/hda/patch_hdmi.c
3672 @@ -930,6 +930,14 @@ static void hdmi_setup_audio_infoframe(struct hda_codec *codec,
3673 }
3674
3675 /*
3676 + * always configure channel mapping, it may have been changed by the
3677 + * user in the meantime
3678 + */
3679 + hdmi_setup_channel_mapping(codec, pin_nid, non_pcm, ca,
3680 + channels, per_pin->chmap,
3681 + per_pin->chmap_set);
3682 +
3683 + /*
3684 * sizeof(ai) is used instead of sizeof(*hdmi_ai) or
3685 * sizeof(*dp_ai) to avoid partial match/update problems when
3686 * the user switches between HDMI/DP monitors.
3687 @@ -940,20 +948,10 @@ static void hdmi_setup_audio_infoframe(struct hda_codec *codec,
3688 "pin=%d channels=%d\n",
3689 pin_nid,
3690 channels);
3691 - hdmi_setup_channel_mapping(codec, pin_nid, non_pcm, ca,
3692 - channels, per_pin->chmap,
3693 - per_pin->chmap_set);
3694 hdmi_stop_infoframe_trans(codec, pin_nid);
3695 hdmi_fill_audio_infoframe(codec, pin_nid,
3696 ai.bytes, sizeof(ai));
3697 hdmi_start_infoframe_trans(codec, pin_nid);
3698 - } else {
3699 - /* For non-pcm audio switch, setup new channel mapping
3700 - * accordingly */
3701 - if (per_pin->non_pcm != non_pcm)
3702 - hdmi_setup_channel_mapping(codec, pin_nid, non_pcm, ca,
3703 - channels, per_pin->chmap,
3704 - per_pin->chmap_set);
3705 }
3706
3707 per_pin->non_pcm = non_pcm;
3708 diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
3709 index 458cf89..21b6649 100644
3710 --- a/sound/pci/hda/patch_realtek.c
3711 +++ b/sound/pci/hda/patch_realtek.c
3712 @@ -3200,6 +3200,15 @@ static void alc269_fixup_limit_int_mic_boost(struct hda_codec *codec,
3713 }
3714 }
3715
3716 +static void alc290_fixup_mono_speakers(struct hda_codec *codec,
3717 + const struct hda_fixup *fix, int action)
3718 +{
3719 + if (action == HDA_FIXUP_ACT_PRE_PROBE)
3720 + /* Remove DAC node 0x03, as it seems to be
3721 + giving mono output */
3722 + snd_hda_override_wcaps(codec, 0x03, 0);
3723 +}
3724 +
3725 enum {
3726 ALC269_FIXUP_SONY_VAIO,
3727 ALC275_FIXUP_SONY_VAIO_GPIO2,
3728 @@ -3223,9 +3232,12 @@ enum {
3729 ALC269_FIXUP_HP_GPIO_LED,
3730 ALC269_FIXUP_INV_DMIC,
3731 ALC269_FIXUP_LENOVO_DOCK,
3732 + ALC286_FIXUP_SONY_MIC_NO_PRESENCE,
3733 ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT,
3734 ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
3735 ALC269_FIXUP_DELL2_MIC_NO_PRESENCE,
3736 + ALC269_FIXUP_DELL3_MIC_NO_PRESENCE,
3737 + ALC290_FIXUP_MONO_SPEAKERS,
3738 ALC269_FIXUP_HEADSET_MODE,
3739 ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC,
3740 ALC269_FIXUP_ASUS_X101_FUNC,
3741 @@ -3412,6 +3424,15 @@ static const struct hda_fixup alc269_fixups[] = {
3742 .chained = true,
3743 .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
3744 },
3745 + [ALC269_FIXUP_DELL3_MIC_NO_PRESENCE] = {
3746 + .type = HDA_FIXUP_PINS,
3747 + .v.pins = (const struct hda_pintbl[]) {
3748 + { 0x1a, 0x01a1913c }, /* use as headset mic, without its own jack detect */
3749 + { }
3750 + },
3751 + .chained = true,
3752 + .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
3753 + },
3754 [ALC269_FIXUP_HEADSET_MODE] = {
3755 .type = HDA_FIXUP_FUNC,
3756 .v.func = alc_fixup_headset_mode,
3757 @@ -3420,6 +3441,13 @@ static const struct hda_fixup alc269_fixups[] = {
3758 .type = HDA_FIXUP_FUNC,
3759 .v.func = alc_fixup_headset_mode_no_hp_mic,
3760 },
3761 + [ALC286_FIXUP_SONY_MIC_NO_PRESENCE] = {
3762 + .type = HDA_FIXUP_PINS,
3763 + .v.pins = (const struct hda_pintbl[]) {
3764 + { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */
3765 + { }
3766 + },
3767 + },
3768 [ALC269_FIXUP_ASUS_X101_FUNC] = {
3769 .type = HDA_FIXUP_FUNC,
3770 .v.func = alc269_fixup_x101_headset_mic,
3771 @@ -3477,6 +3505,12 @@ static const struct hda_fixup alc269_fixups[] = {
3772 .type = HDA_FIXUP_FUNC,
3773 .v.func = alc269_fixup_limit_int_mic_boost,
3774 },
3775 + [ALC290_FIXUP_MONO_SPEAKERS] = {
3776 + .type = HDA_FIXUP_FUNC,
3777 + .v.func = alc290_fixup_mono_speakers,
3778 + .chained = true,
3779 + .chain_id = ALC269_FIXUP_DELL3_MIC_NO_PRESENCE,
3780 + },
3781 };
3782
3783 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
3784 @@ -3511,6 +3545,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
3785 SND_PCI_QUIRK(0x1028, 0x0608, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
3786 SND_PCI_QUIRK(0x1028, 0x0609, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
3787 SND_PCI_QUIRK(0x1028, 0x0613, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
3788 + SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_MONO_SPEAKERS),
3789 SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
3790 SND_PCI_QUIRK(0x103c, 0x18e6, "HP", ALC269_FIXUP_HP_GPIO_LED),
3791 SND_PCI_QUIRK(0x103c, 0x1973, "HP Pavilion", ALC269_FIXUP_HP_MUTE_LED_MIC1),
3792 @@ -3529,6 +3564,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
3793 SND_PCI_QUIRK(0x1043, 0x8398, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC),
3794 SND_PCI_QUIRK(0x1043, 0x83ce, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC),
3795 SND_PCI_QUIRK(0x1043, 0x8516, "ASUS X101CH", ALC269_FIXUP_ASUS_X101),
3796 + SND_PCI_QUIRK(0x104d, 0x90b6, "Sony VAIO Pro 13", ALC286_FIXUP_SONY_MIC_NO_PRESENCE),
3797 SND_PCI_QUIRK(0x104d, 0x9073, "Sony VAIO", ALC275_FIXUP_SONY_VAIO_GPIO2),
3798 SND_PCI_QUIRK(0x104d, 0x907b, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ),
3799 SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ),
3800 @@ -4216,6 +4252,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
3801 SND_PCI_QUIRK(0x1028, 0x05d8, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
3802 SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
3803 SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
3804 + SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_ASUS_MODE4),
3805 SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT),
3806 SND_PCI_QUIRK(0x105b, 0x0cd6, "Foxconn", ALC662_FIXUP_ASUS_MODE2),
3807 SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD),
3808 diff --git a/sound/usb/usx2y/usbusx2yaudio.c b/sound/usb/usx2y/usbusx2yaudio.c
3809 index 0ce90337..cd69a80 100644
3810 --- a/sound/usb/usx2y/usbusx2yaudio.c
3811 +++ b/sound/usb/usx2y/usbusx2yaudio.c
3812 @@ -299,19 +299,6 @@ static void usX2Y_error_urb_status(struct usX2Ydev *usX2Y,
3813 usX2Y_clients_stop(usX2Y);
3814 }
3815
3816 -static void usX2Y_error_sequence(struct usX2Ydev *usX2Y,
3817 - struct snd_usX2Y_substream *subs, struct urb *urb)
3818 -{
3819 - snd_printk(KERN_ERR
3820 -"Sequence Error!(hcd_frame=%i ep=%i%s;wait=%i,frame=%i).\n"
3821 -"Most probably some urb of usb-frame %i is still missing.\n"
3822 -"Cause could be too long delays in usb-hcd interrupt handling.\n",
3823 - usb_get_current_frame_number(usX2Y->dev),
3824 - subs->endpoint, usb_pipein(urb->pipe) ? "in" : "out",
3825 - usX2Y->wait_iso_frame, urb->start_frame, usX2Y->wait_iso_frame);
3826 - usX2Y_clients_stop(usX2Y);
3827 -}
3828 -
3829 static void i_usX2Y_urb_complete(struct urb *urb)
3830 {
3831 struct snd_usX2Y_substream *subs = urb->context;
3832 @@ -328,12 +315,9 @@ static void i_usX2Y_urb_complete(struct urb *urb)
3833 usX2Y_error_urb_status(usX2Y, subs, urb);
3834 return;
3835 }
3836 - if (likely((urb->start_frame & 0xFFFF) == (usX2Y->wait_iso_frame & 0xFFFF)))
3837 - subs->completed_urb = urb;
3838 - else {
3839 - usX2Y_error_sequence(usX2Y, subs, urb);
3840 - return;
3841 - }
3842 +
3843 + subs->completed_urb = urb;
3844 +
3845 {
3846 struct snd_usX2Y_substream *capsubs = usX2Y->subs[SNDRV_PCM_STREAM_CAPTURE],
3847 *playbacksubs = usX2Y->subs[SNDRV_PCM_STREAM_PLAYBACK];
3848 diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c
3849 index f2a1acd..814d0e8 100644
3850 --- a/sound/usb/usx2y/usx2yhwdeppcm.c
3851 +++ b/sound/usb/usx2y/usx2yhwdeppcm.c
3852 @@ -244,13 +244,8 @@ static void i_usX2Y_usbpcm_urb_complete(struct urb *urb)
3853 usX2Y_error_urb_status(usX2Y, subs, urb);
3854 return;
3855 }
3856 - if (likely((urb->start_frame & 0xFFFF) == (usX2Y->wait_iso_frame & 0xFFFF)))
3857 - subs->completed_urb = urb;
3858 - else {
3859 - usX2Y_error_sequence(usX2Y, subs, urb);
3860 - return;
3861 - }
3862
3863 + subs->completed_urb = urb;
3864 capsubs = usX2Y->subs[SNDRV_PCM_STREAM_CAPTURE];
3865 capsubs2 = usX2Y->subs[SNDRV_PCM_STREAM_CAPTURE + 2];
3866 playbacksubs = usX2Y->subs[SNDRV_PCM_STREAM_PLAYBACK];