Contents of /trunk/kernel-alx/patches-3.10/0116-3.10.17-all-fixes.patch
Parent Directory | Revision Log
Revision 2344 -
(show annotations)
(download)
Mon Dec 16 10:04:39 2013 UTC (10 years, 9 months ago) by niro
File size: 119145 byte(s)
Mon Dec 16 10:04:39 2013 UTC (10 years, 9 months ago) by niro
File size: 119145 byte(s)
-linux-3.10.17
1 | diff --git a/Makefile b/Makefile |
2 | index e9528d2..5c7d3d6 100644 |
3 | --- a/Makefile |
4 | +++ b/Makefile |
5 | @@ -1,6 +1,6 @@ |
6 | VERSION = 3 |
7 | PATCHLEVEL = 10 |
8 | -SUBLEVEL = 16 |
9 | +SUBLEVEL = 17 |
10 | EXTRAVERSION = |
11 | NAME = TOSSUG Baby Fish |
12 | |
13 | diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h |
14 | index 442ce5d..43de302 100644 |
15 | --- a/arch/arc/include/asm/delay.h |
16 | +++ b/arch/arc/include/asm/delay.h |
17 | @@ -53,11 +53,10 @@ static inline void __udelay(unsigned long usecs) |
18 | { |
19 | unsigned long loops; |
20 | |
21 | - /* (long long) cast ensures 64 bit MPY - real or emulated |
22 | + /* (u64) cast ensures 64 bit MPY - real or emulated |
23 | * HZ * 4295 is pre-evaluated by gcc - hence only 2 mpy ops |
24 | */ |
25 | - loops = ((long long)(usecs * 4295 * HZ) * |
26 | - (long long)(loops_per_jiffy)) >> 32; |
27 | + loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32; |
28 | |
29 | __delay(loops); |
30 | } |
31 | diff --git a/arch/arc/include/asm/sections.h b/arch/arc/include/asm/sections.h |
32 | index 6fc1159..764f1e3 100644 |
33 | --- a/arch/arc/include/asm/sections.h |
34 | +++ b/arch/arc/include/asm/sections.h |
35 | @@ -11,7 +11,6 @@ |
36 | |
37 | #include <asm-generic/sections.h> |
38 | |
39 | -extern char _int_vec_base_lds[]; |
40 | extern char __arc_dccm_base[]; |
41 | extern char __dtb_start[]; |
42 | |
43 | diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h |
44 | index f158197..b6a8c2d 100644 |
45 | --- a/arch/arc/include/asm/spinlock.h |
46 | +++ b/arch/arc/include/asm/spinlock.h |
47 | @@ -45,7 +45,14 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) |
48 | |
49 | static inline void arch_spin_unlock(arch_spinlock_t *lock) |
50 | { |
51 | - lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__; |
52 | + unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; |
53 | + |
54 | + __asm__ __volatile__( |
55 | + " ex %0, [%1] \n" |
56 | + : "+r" (tmp) |
57 | + : "r"(&(lock->slock)) |
58 | + : "memory"); |
59 | + |
60 | smp_mb(); |
61 | } |
62 | |
63 | diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h |
64 | index 3242082..30c9baf 100644 |
65 | --- a/arch/arc/include/asm/uaccess.h |
66 | +++ b/arch/arc/include/asm/uaccess.h |
67 | @@ -43,7 +43,7 @@ |
68 | * Because it essentially checks if buffer end is within limit and @len is |
69 | * non-ngeative, which implies that buffer start will be within limit too. |
70 | * |
71 | - * The reason for rewriting being, for majorit yof cases, @len is generally |
72 | + * The reason for rewriting being, for majority of cases, @len is generally |
73 | * compile time constant, causing first sub-expression to be compile time |
74 | * subsumed. |
75 | * |
76 | @@ -53,7 +53,7 @@ |
77 | * |
78 | */ |
79 | #define __user_ok(addr, sz) (((sz) <= TASK_SIZE) && \ |
80 | - (((addr)+(sz)) <= get_fs())) |
81 | + ((addr) <= (get_fs() - (sz)))) |
82 | #define __access_ok(addr, sz) (unlikely(__kernel_ok) || \ |
83 | likely(__user_ok((addr), (sz)))) |
84 | |
85 | diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S |
86 | index 006dec3..0f944f0 100644 |
87 | --- a/arch/arc/kernel/head.S |
88 | +++ b/arch/arc/kernel/head.S |
89 | @@ -27,11 +27,16 @@ stext: |
90 | ; Don't clobber r0-r4 yet. It might have bootloader provided info |
91 | ;------------------------------------------------------------------- |
92 | |
93 | + sr @_int_vec_base_lds, [AUX_INTR_VEC_BASE] |
94 | + |
95 | #ifdef CONFIG_SMP |
96 | ; Only Boot (Master) proceeds. Others wait in platform dependent way |
97 | ; IDENTITY Reg [ 3 2 1 0 ] |
98 | ; (cpu-id) ^^^ => Zero for UP ARC700 |
99 | ; => #Core-ID if SMP (Master 0) |
100 | + ; Note that non-boot CPUs might not land here if halt-on-reset and |
101 | + ; instead breath life from @first_lines_of_secondary, but we still |
102 | + ; need to make sure only boot cpu takes this path. |
103 | GET_CPU_ID r5 |
104 | cmp r5, 0 |
105 | jnz arc_platform_smp_wait_to_boot |
106 | @@ -96,6 +101,8 @@ stext: |
107 | |
108 | first_lines_of_secondary: |
109 | |
110 | + sr @_int_vec_base_lds, [AUX_INTR_VEC_BASE] |
111 | + |
112 | ; setup per-cpu idle task as "current" on this CPU |
113 | ld r0, [@secondary_idle_tsk] |
114 | SET_CURR_TASK_ON_CPU r0, r1 |
115 | diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c |
116 | index 8115fa5..a199471 100644 |
117 | --- a/arch/arc/kernel/irq.c |
118 | +++ b/arch/arc/kernel/irq.c |
119 | @@ -24,7 +24,6 @@ |
120 | * -Needed for each CPU (hence not foldable into init_IRQ) |
121 | * |
122 | * what it does ? |
123 | - * -setup Vector Table Base Reg - in case Linux not linked at 0x8000_0000 |
124 | * -Disable all IRQs (on CPU side) |
125 | * -Optionally, setup the High priority Interrupts as Level 2 IRQs |
126 | */ |
127 | @@ -32,8 +31,6 @@ void __cpuinit arc_init_IRQ(void) |
128 | { |
129 | int level_mask = 0; |
130 | |
131 | - write_aux_reg(AUX_INTR_VEC_BASE, _int_vec_base_lds); |
132 | - |
133 | /* Disable all IRQs: enable them as devices request */ |
134 | write_aux_reg(AUX_IENABLE, 0); |
135 | |
136 | diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c |
137 | index c6a81c5..0851604 100644 |
138 | --- a/arch/arc/kernel/ptrace.c |
139 | +++ b/arch/arc/kernel/ptrace.c |
140 | @@ -92,7 +92,7 @@ static int genregs_set(struct task_struct *target, |
141 | REG_IN_CHUNK(scratch, callee, ptregs); /* pt_regs[bta..orig_r8] */ |
142 | REG_IN_CHUNK(callee, efa, cregs); /* callee_regs[r25..r13] */ |
143 | REG_IGNORE_ONE(efa); /* efa update invalid */ |
144 | - REG_IN_ONE(stop_pc, &ptregs->ret); /* stop_pc: PC update */ |
145 | + REG_IGNORE_ONE(stop_pc); /* PC updated via @ret */ |
146 | |
147 | return ret; |
148 | } |
149 | diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c |
150 | index b2b3731..2d7786b 100644 |
151 | --- a/arch/arc/kernel/setup.c |
152 | +++ b/arch/arc/kernel/setup.c |
153 | @@ -47,10 +47,7 @@ void __cpuinit read_arc_build_cfg_regs(void) |
154 | READ_BCR(AUX_IDENTITY, cpu->core); |
155 | |
156 | cpu->timers = read_aux_reg(ARC_REG_TIMERS_BCR); |
157 | - |
158 | cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE); |
159 | - if (cpu->vec_base == 0) |
160 | - cpu->vec_base = (unsigned int)_int_vec_base_lds; |
161 | |
162 | READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space); |
163 | cpu->uncached_base = uncached_space.start << 24; |
164 | diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c |
165 | index ee6ef2f..7e95e1a 100644 |
166 | --- a/arch/arc/kernel/signal.c |
167 | +++ b/arch/arc/kernel/signal.c |
168 | @@ -101,7 +101,6 @@ SYSCALL_DEFINE0(rt_sigreturn) |
169 | { |
170 | struct rt_sigframe __user *sf; |
171 | unsigned int magic; |
172 | - int err; |
173 | struct pt_regs *regs = current_pt_regs(); |
174 | |
175 | /* Always make any pending restarted system calls return -EINTR */ |
176 | @@ -119,15 +118,16 @@ SYSCALL_DEFINE0(rt_sigreturn) |
177 | if (!access_ok(VERIFY_READ, sf, sizeof(*sf))) |
178 | goto badframe; |
179 | |
180 | - err = restore_usr_regs(regs, sf); |
181 | - err |= __get_user(magic, &sf->sigret_magic); |
182 | - if (err) |
183 | + if (__get_user(magic, &sf->sigret_magic)) |
184 | goto badframe; |
185 | |
186 | if (unlikely(is_do_ss_needed(magic))) |
187 | if (restore_altstack(&sf->uc.uc_stack)) |
188 | goto badframe; |
189 | |
190 | + if (restore_usr_regs(regs, sf)) |
191 | + goto badframe; |
192 | + |
193 | /* Don't restart from sigreturn */ |
194 | syscall_wont_restart(regs); |
195 | |
196 | @@ -191,6 +191,15 @@ setup_rt_frame(int signo, struct k_sigaction *ka, siginfo_t *info, |
197 | return 1; |
198 | |
199 | /* |
200 | + * w/o SA_SIGINFO, struct ucontext is partially populated (only |
201 | + * uc_mcontext/uc_sigmask) for kernel's normal user state preservation |
202 | + * during signal handler execution. This works for SA_SIGINFO as well |
203 | + * although the semantics are now overloaded (the same reg state can be |
204 | + * inspected by userland: but are they allowed to fiddle with it ? |
205 | + */ |
206 | + err |= stash_usr_regs(sf, regs, set); |
207 | + |
208 | + /* |
209 | * SA_SIGINFO requires 3 args to signal handler: |
210 | * #1: sig-no (common to any handler) |
211 | * #2: struct siginfo |
212 | @@ -213,14 +222,6 @@ setup_rt_frame(int signo, struct k_sigaction *ka, siginfo_t *info, |
213 | magic = MAGIC_SIGALTSTK; |
214 | } |
215 | |
216 | - /* |
217 | - * w/o SA_SIGINFO, struct ucontext is partially populated (only |
218 | - * uc_mcontext/uc_sigmask) for kernel's normal user state preservation |
219 | - * during signal handler execution. This works for SA_SIGINFO as well |
220 | - * although the semantics are now overloaded (the same reg state can be |
221 | - * inspected by userland: but are they allowed to fiddle with it ? |
222 | - */ |
223 | - err |= stash_usr_regs(sf, regs, set); |
224 | err |= __put_user(magic, &sf->sigret_magic); |
225 | if (err) |
226 | return err; |
227 | diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c |
228 | index 4cd8163..116d3e0 100644 |
229 | --- a/arch/arc/kernel/unaligned.c |
230 | +++ b/arch/arc/kernel/unaligned.c |
231 | @@ -233,6 +233,12 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs, |
232 | regs->status32 &= ~STATUS_DE_MASK; |
233 | } else { |
234 | regs->ret += state.instr_len; |
235 | + |
236 | + /* handle zero-overhead-loop */ |
237 | + if ((regs->ret == regs->lp_end) && (regs->lp_count)) { |
238 | + regs->ret = regs->lp_start; |
239 | + regs->lp_count--; |
240 | + } |
241 | } |
242 | |
243 | return 0; |
244 | diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h |
245 | index bfc198c..863c892 100644 |
246 | --- a/arch/arm/include/asm/jump_label.h |
247 | +++ b/arch/arm/include/asm/jump_label.h |
248 | @@ -16,7 +16,7 @@ |
249 | |
250 | static __always_inline bool arch_static_branch(struct static_key *key) |
251 | { |
252 | - asm goto("1:\n\t" |
253 | + asm_volatile_goto("1:\n\t" |
254 | JUMP_LABEL_NOP "\n\t" |
255 | ".pushsection __jump_table, \"aw\"\n\t" |
256 | ".word 1b, %l[l_yes], %c0\n\t" |
257 | diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c |
258 | index 5bc2615..ab1fe3b 100644 |
259 | --- a/arch/arm/kernel/process.c |
260 | +++ b/arch/arm/kernel/process.c |
261 | @@ -466,7 +466,7 @@ int in_gate_area_no_mm(unsigned long addr) |
262 | { |
263 | return in_gate_area(NULL, addr); |
264 | } |
265 | -#define is_gate_vma(vma) ((vma) = &gate_vma) |
266 | +#define is_gate_vma(vma) ((vma) == &gate_vma) |
267 | #else |
268 | #define is_gate_vma(vma) 0 |
269 | #endif |
270 | diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h |
271 | index 4d6d77e..e194f95 100644 |
272 | --- a/arch/mips/include/asm/jump_label.h |
273 | +++ b/arch/mips/include/asm/jump_label.h |
274 | @@ -22,7 +22,7 @@ |
275 | |
276 | static __always_inline bool arch_static_branch(struct static_key *key) |
277 | { |
278 | - asm goto("1:\tnop\n\t" |
279 | + asm_volatile_goto("1:\tnop\n\t" |
280 | "nop\n\t" |
281 | ".pushsection __jump_table, \"aw\"\n\t" |
282 | WORD_INSN " 1b, %l[l_yes], %0\n\t" |
283 | diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c |
284 | index 04e47c6..b3f87a3 100644 |
285 | --- a/arch/parisc/kernel/traps.c |
286 | +++ b/arch/parisc/kernel/traps.c |
287 | @@ -805,14 +805,14 @@ void notrace handle_interruption(int code, struct pt_regs *regs) |
288 | else { |
289 | |
290 | /* |
291 | - * The kernel should never fault on its own address space. |
292 | + * The kernel should never fault on its own address space, |
293 | + * unless pagefault_disable() was called before. |
294 | */ |
295 | |
296 | - if (fault_space == 0) |
297 | + if (fault_space == 0 && !in_atomic()) |
298 | { |
299 | pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC); |
300 | parisc_terminate("Kernel Fault", regs, code, fault_address); |
301 | - |
302 | } |
303 | } |
304 | |
305 | diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h |
306 | index ae098c4..f016bb6 100644 |
307 | --- a/arch/powerpc/include/asm/jump_label.h |
308 | +++ b/arch/powerpc/include/asm/jump_label.h |
309 | @@ -19,7 +19,7 @@ |
310 | |
311 | static __always_inline bool arch_static_branch(struct static_key *key) |
312 | { |
313 | - asm goto("1:\n\t" |
314 | + asm_volatile_goto("1:\n\t" |
315 | "nop\n\t" |
316 | ".pushsection __jump_table, \"aw\"\n\t" |
317 | JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" |
318 | diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S |
319 | index b02f91e..7bcd4d6 100644 |
320 | --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S |
321 | +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S |
322 | @@ -1054,7 +1054,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) |
323 | BEGIN_FTR_SECTION |
324 | mfspr r8, SPRN_DSCR |
325 | ld r7, HSTATE_DSCR(r13) |
326 | - std r8, VCPU_DSCR(r7) |
327 | + std r8, VCPU_DSCR(r9) |
328 | mtspr SPRN_DSCR, r7 |
329 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) |
330 | |
331 | diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h |
332 | index 6c32190..346b1c8 100644 |
333 | --- a/arch/s390/include/asm/jump_label.h |
334 | +++ b/arch/s390/include/asm/jump_label.h |
335 | @@ -15,7 +15,7 @@ |
336 | |
337 | static __always_inline bool arch_static_branch(struct static_key *key) |
338 | { |
339 | - asm goto("0: brcl 0,0\n" |
340 | + asm_volatile_goto("0: brcl 0,0\n" |
341 | ".pushsection __jump_table, \"aw\"\n" |
342 | ASM_ALIGN "\n" |
343 | ASM_PTR " 0b, %l[label], %0\n" |
344 | diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h |
345 | index 5080d16..ec2e2e2 100644 |
346 | --- a/arch/sparc/include/asm/jump_label.h |
347 | +++ b/arch/sparc/include/asm/jump_label.h |
348 | @@ -9,7 +9,7 @@ |
349 | |
350 | static __always_inline bool arch_static_branch(struct static_key *key) |
351 | { |
352 | - asm goto("1:\n\t" |
353 | + asm_volatile_goto("1:\n\t" |
354 | "nop\n\t" |
355 | "nop\n\t" |
356 | ".pushsection __jump_table, \"aw\"\n\t" |
357 | diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h |
358 | index e99ac27..4af181d 100644 |
359 | --- a/arch/x86/include/asm/cpufeature.h |
360 | +++ b/arch/x86/include/asm/cpufeature.h |
361 | @@ -365,7 +365,7 @@ extern const char * const x86_power_flags[32]; |
362 | static __always_inline __pure bool __static_cpu_has(u16 bit) |
363 | { |
364 | #if __GNUC__ > 4 || __GNUC_MINOR__ >= 5 |
365 | - asm goto("1: jmp %l[t_no]\n" |
366 | + asm_volatile_goto("1: jmp %l[t_no]\n" |
367 | "2:\n" |
368 | ".section .altinstructions,\"a\"\n" |
369 | " .long 1b - .\n" |
370 | diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h |
371 | index cccd07f..779c2ef 100644 |
372 | --- a/arch/x86/include/asm/e820.h |
373 | +++ b/arch/x86/include/asm/e820.h |
374 | @@ -29,7 +29,7 @@ extern void e820_setup_gap(void); |
375 | extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, |
376 | unsigned long start_addr, unsigned long long end_addr); |
377 | struct setup_data; |
378 | -extern void parse_e820_ext(struct setup_data *data); |
379 | +extern void parse_e820_ext(u64 phys_addr, u32 data_len); |
380 | |
381 | #if defined(CONFIG_X86_64) || \ |
382 | (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) |
383 | diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h |
384 | index 3a16c14..0297669 100644 |
385 | --- a/arch/x86/include/asm/jump_label.h |
386 | +++ b/arch/x86/include/asm/jump_label.h |
387 | @@ -13,7 +13,7 @@ |
388 | |
389 | static __always_inline bool arch_static_branch(struct static_key *key) |
390 | { |
391 | - asm goto("1:" |
392 | + asm_volatile_goto("1:" |
393 | STATIC_KEY_INITIAL_NOP |
394 | ".pushsection __jump_table, \"aw\" \n\t" |
395 | _ASM_ALIGN "\n\t" |
396 | diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c |
397 | index d32abea..174da5f 100644 |
398 | --- a/arch/x86/kernel/e820.c |
399 | +++ b/arch/x86/kernel/e820.c |
400 | @@ -658,15 +658,18 @@ __init void e820_setup_gap(void) |
401 | * boot_params.e820_map, others are passed via SETUP_E820_EXT node of |
402 | * linked list of struct setup_data, which is parsed here. |
403 | */ |
404 | -void __init parse_e820_ext(struct setup_data *sdata) |
405 | +void __init parse_e820_ext(u64 phys_addr, u32 data_len) |
406 | { |
407 | int entries; |
408 | struct e820entry *extmap; |
409 | + struct setup_data *sdata; |
410 | |
411 | + sdata = early_memremap(phys_addr, data_len); |
412 | entries = sdata->len / sizeof(struct e820entry); |
413 | extmap = (struct e820entry *)(sdata->data); |
414 | __append_e820_map(extmap, entries); |
415 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
416 | + early_iounmap(sdata, data_len); |
417 | printk(KERN_INFO "e820: extended physical RAM map:\n"); |
418 | e820_print_map("extended"); |
419 | } |
420 | diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c |
421 | index 56f7fcf..91964c6 100644 |
422 | --- a/arch/x86/kernel/setup.c |
423 | +++ b/arch/x86/kernel/setup.c |
424 | @@ -426,25 +426,23 @@ static void __init reserve_initrd(void) |
425 | static void __init parse_setup_data(void) |
426 | { |
427 | struct setup_data *data; |
428 | - u64 pa_data; |
429 | + u64 pa_data, pa_next; |
430 | |
431 | pa_data = boot_params.hdr.setup_data; |
432 | while (pa_data) { |
433 | - u32 data_len, map_len; |
434 | + u32 data_len, map_len, data_type; |
435 | |
436 | map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK), |
437 | (u64)sizeof(struct setup_data)); |
438 | data = early_memremap(pa_data, map_len); |
439 | data_len = data->len + sizeof(struct setup_data); |
440 | - if (data_len > map_len) { |
441 | - early_iounmap(data, map_len); |
442 | - data = early_memremap(pa_data, data_len); |
443 | - map_len = data_len; |
444 | - } |
445 | + data_type = data->type; |
446 | + pa_next = data->next; |
447 | + early_iounmap(data, map_len); |
448 | |
449 | - switch (data->type) { |
450 | + switch (data_type) { |
451 | case SETUP_E820_EXT: |
452 | - parse_e820_ext(data); |
453 | + parse_e820_ext(pa_data, data_len); |
454 | break; |
455 | case SETUP_DTB: |
456 | add_dtb(pa_data); |
457 | @@ -452,8 +450,7 @@ static void __init parse_setup_data(void) |
458 | default: |
459 | break; |
460 | } |
461 | - pa_data = data->next; |
462 | - early_iounmap(data, map_len); |
463 | + pa_data = pa_next; |
464 | } |
465 | } |
466 | |
467 | diff --git a/drivers/char/random.c b/drivers/char/random.c |
468 | index 35487e8..81eefa1 100644 |
469 | --- a/drivers/char/random.c |
470 | +++ b/drivers/char/random.c |
471 | @@ -1462,12 +1462,11 @@ ctl_table random_table[] = { |
472 | |
473 | static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; |
474 | |
475 | -static int __init random_int_secret_init(void) |
476 | +int random_int_secret_init(void) |
477 | { |
478 | get_random_bytes(random_int_secret, sizeof(random_int_secret)); |
479 | return 0; |
480 | } |
481 | -late_initcall(random_int_secret_init); |
482 | |
483 | /* |
484 | * Get a random word for internal kernel use only. Similar to urandom but |
485 | diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c |
486 | index 2667d6d..ab95259 100644 |
487 | --- a/drivers/gpu/drm/i915/intel_display.c |
488 | +++ b/drivers/gpu/drm/i915/intel_display.c |
489 | @@ -3946,8 +3946,6 @@ static void intel_connector_check_state(struct intel_connector *connector) |
490 | * consider. */ |
491 | void intel_connector_dpms(struct drm_connector *connector, int mode) |
492 | { |
493 | - struct intel_encoder *encoder = intel_attached_encoder(connector); |
494 | - |
495 | /* All the simple cases only support two dpms states. */ |
496 | if (mode != DRM_MODE_DPMS_ON) |
497 | mode = DRM_MODE_DPMS_OFF; |
498 | @@ -3958,10 +3956,8 @@ void intel_connector_dpms(struct drm_connector *connector, int mode) |
499 | connector->dpms = mode; |
500 | |
501 | /* Only need to change hw state when actually enabled */ |
502 | - if (encoder->base.crtc) |
503 | - intel_encoder_dpms(encoder, mode); |
504 | - else |
505 | - WARN_ON(encoder->connectors_active != false); |
506 | + if (connector->encoder) |
507 | + intel_encoder_dpms(to_intel_encoder(connector->encoder), mode); |
508 | |
509 | intel_modeset_check_state(connector->dev); |
510 | } |
511 | diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c |
512 | index 2068df1..8b6b0ba 100644 |
513 | --- a/drivers/gpu/drm/radeon/evergreen.c |
514 | +++ b/drivers/gpu/drm/radeon/evergreen.c |
515 | @@ -2990,7 +2990,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev) |
516 | rdev->config.evergreen.sx_max_export_size = 256; |
517 | rdev->config.evergreen.sx_max_export_pos_size = 64; |
518 | rdev->config.evergreen.sx_max_export_smx_size = 192; |
519 | - rdev->config.evergreen.max_hw_contexts = 8; |
520 | + rdev->config.evergreen.max_hw_contexts = 4; |
521 | rdev->config.evergreen.sq_num_cf_insts = 2; |
522 | |
523 | rdev->config.evergreen.sc_prim_fifo_size = 0x40; |
524 | diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h |
525 | index 9490972..150e318 100644 |
526 | --- a/drivers/gpu/drm/radeon/evergreend.h |
527 | +++ b/drivers/gpu/drm/radeon/evergreend.h |
528 | @@ -1104,7 +1104,7 @@ |
529 | * 6. COMMAND [29:22] | BYTE_COUNT [20:0] |
530 | */ |
531 | # define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20) |
532 | - /* 0 - SRC_ADDR |
533 | + /* 0 - DST_ADDR |
534 | * 1 - GDS |
535 | */ |
536 | # define PACKET3_CP_DMA_ENGINE(x) ((x) << 27) |
537 | @@ -1119,7 +1119,7 @@ |
538 | # define PACKET3_CP_DMA_CP_SYNC (1 << 31) |
539 | /* COMMAND */ |
540 | # define PACKET3_CP_DMA_DIS_WC (1 << 21) |
541 | -# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) |
542 | +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22) |
543 | /* 0 - none |
544 | * 1 - 8 in 16 |
545 | * 2 - 8 in 32 |
546 | diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h |
547 | index 79df558..2fd2241 100644 |
548 | --- a/drivers/gpu/drm/radeon/r600d.h |
549 | +++ b/drivers/gpu/drm/radeon/r600d.h |
550 | @@ -1259,7 +1259,7 @@ |
551 | */ |
552 | # define PACKET3_CP_DMA_CP_SYNC (1 << 31) |
553 | /* COMMAND */ |
554 | -# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) |
555 | +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22) |
556 | /* 0 - none |
557 | * 1 - 8 in 16 |
558 | * 2 - 8 in 32 |
559 | diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c |
560 | index bbed4af..f9ebf2b 100644 |
561 | --- a/drivers/gpu/drm/radeon/radeon_test.c |
562 | +++ b/drivers/gpu/drm/radeon/radeon_test.c |
563 | @@ -37,8 +37,8 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag) |
564 | struct radeon_bo **gtt_obj = NULL; |
565 | struct radeon_fence *fence = NULL; |
566 | uint64_t gtt_addr, vram_addr; |
567 | - unsigned i, n, size; |
568 | - int r, ring; |
569 | + unsigned n, size; |
570 | + int i, r, ring; |
571 | |
572 | switch (flag) { |
573 | case RADEON_TEST_COPY_DMA: |
574 | diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h |
575 | index 8c68e67..495f41f 100644 |
576 | --- a/drivers/gpu/drm/radeon/sid.h |
577 | +++ b/drivers/gpu/drm/radeon/sid.h |
578 | @@ -928,7 +928,7 @@ |
579 | * 6. COMMAND [30:21] | BYTE_COUNT [20:0] |
580 | */ |
581 | # define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20) |
582 | - /* 0 - SRC_ADDR |
583 | + /* 0 - DST_ADDR |
584 | * 1 - GDS |
585 | */ |
586 | # define PACKET3_CP_DMA_ENGINE(x) ((x) << 27) |
587 | @@ -943,7 +943,7 @@ |
588 | # define PACKET3_CP_DMA_CP_SYNC (1 << 31) |
589 | /* COMMAND */ |
590 | # define PACKET3_CP_DMA_DIS_WC (1 << 21) |
591 | -# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) |
592 | +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22) |
593 | /* 0 - none |
594 | * 1 - 8 in 16 |
595 | * 2 - 8 in 32 |
596 | diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c |
597 | index 98814d1..3288f13 100644 |
598 | --- a/drivers/hwmon/applesmc.c |
599 | +++ b/drivers/hwmon/applesmc.c |
600 | @@ -230,6 +230,7 @@ static int send_argument(const char *key) |
601 | |
602 | static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) |
603 | { |
604 | + u8 status, data = 0; |
605 | int i; |
606 | |
607 | if (send_command(cmd) || send_argument(key)) { |
608 | @@ -237,6 +238,7 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) |
609 | return -EIO; |
610 | } |
611 | |
612 | + /* This has no effect on newer (2012) SMCs */ |
613 | if (send_byte(len, APPLESMC_DATA_PORT)) { |
614 | pr_warn("%.4s: read len fail\n", key); |
615 | return -EIO; |
616 | @@ -250,6 +252,17 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) |
617 | buffer[i] = inb(APPLESMC_DATA_PORT); |
618 | } |
619 | |
620 | + /* Read the data port until bit0 is cleared */ |
621 | + for (i = 0; i < 16; i++) { |
622 | + udelay(APPLESMC_MIN_WAIT); |
623 | + status = inb(APPLESMC_CMD_PORT); |
624 | + if (!(status & 0x01)) |
625 | + break; |
626 | + data = inb(APPLESMC_DATA_PORT); |
627 | + } |
628 | + if (i) |
629 | + pr_warn("flushed %d bytes, last value is: %d\n", i, data); |
630 | + |
631 | return 0; |
632 | } |
633 | |
634 | diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c |
635 | index e02f9e3..b06be8e 100644 |
636 | --- a/drivers/i2c/busses/i2c-omap.c |
637 | +++ b/drivers/i2c/busses/i2c-omap.c |
638 | @@ -941,6 +941,9 @@ omap_i2c_isr_thread(int this_irq, void *dev_id) |
639 | /* |
640 | * ProDB0017052: Clear ARDY bit twice |
641 | */ |
642 | + if (stat & OMAP_I2C_STAT_ARDY) |
643 | + omap_i2c_ack_stat(dev, OMAP_I2C_STAT_ARDY); |
644 | + |
645 | if (stat & (OMAP_I2C_STAT_ARDY | OMAP_I2C_STAT_NACK | |
646 | OMAP_I2C_STAT_AL)) { |
647 | omap_i2c_ack_stat(dev, (OMAP_I2C_STAT_RRDY | |
648 | diff --git a/drivers/watchdog/ts72xx_wdt.c b/drivers/watchdog/ts72xx_wdt.c |
649 | index b8a9245..9ad2bd3 100644 |
650 | --- a/drivers/watchdog/ts72xx_wdt.c |
651 | +++ b/drivers/watchdog/ts72xx_wdt.c |
652 | @@ -310,7 +310,8 @@ static long ts72xx_wdt_ioctl(struct file *file, unsigned int cmd, |
653 | |
654 | case WDIOC_GETSTATUS: |
655 | case WDIOC_GETBOOTSTATUS: |
656 | - return put_user(0, p); |
657 | + error = put_user(0, p); |
658 | + break; |
659 | |
660 | case WDIOC_KEEPALIVE: |
661 | ts72xx_wdt_kick(wdt); |
662 | diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c |
663 | index 17f3064..1e2288d 100644 |
664 | --- a/fs/btrfs/inode.c |
665 | +++ b/fs/btrfs/inode.c |
666 | @@ -8146,7 +8146,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, |
667 | |
668 | |
669 | /* check for collisions, even if the name isn't there */ |
670 | - ret = btrfs_check_dir_item_collision(root, new_dir->i_ino, |
671 | + ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, |
672 | new_dentry->d_name.name, |
673 | new_dentry->d_name.len); |
674 | |
675 | diff --git a/fs/dcache.c b/fs/dcache.c |
676 | index f09b908..da89cdf 100644 |
677 | --- a/fs/dcache.c |
678 | +++ b/fs/dcache.c |
679 | @@ -2724,6 +2724,17 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, |
680 | return memcpy(buffer, temp, sz); |
681 | } |
682 | |
683 | +char *simple_dname(struct dentry *dentry, char *buffer, int buflen) |
684 | +{ |
685 | + char *end = buffer + buflen; |
686 | + /* these dentries are never renamed, so d_lock is not needed */ |
687 | + if (prepend(&end, &buflen, " (deleted)", 11) || |
688 | + prepend_name(&end, &buflen, &dentry->d_name) || |
689 | + prepend(&end, &buflen, "/", 1)) |
690 | + end = ERR_PTR(-ENAMETOOLONG); |
691 | + return end; |
692 | +} |
693 | + |
694 | /* |
695 | * Write full pathname from the root of the filesystem into the buffer. |
696 | */ |
697 | diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c |
698 | index c081e34..03e9beb 100644 |
699 | --- a/fs/ext4/xattr.c |
700 | +++ b/fs/ext4/xattr.c |
701 | @@ -1350,6 +1350,8 @@ retry: |
702 | s_min_extra_isize) { |
703 | tried_min_extra_isize++; |
704 | new_extra_isize = s_min_extra_isize; |
705 | + kfree(is); is = NULL; |
706 | + kfree(bs); bs = NULL; |
707 | goto retry; |
708 | } |
709 | error = -1; |
710 | diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c |
711 | index a3f868a..4e5f332 100644 |
712 | --- a/fs/hugetlbfs/inode.c |
713 | +++ b/fs/hugetlbfs/inode.c |
714 | @@ -916,14 +916,8 @@ static int get_hstate_idx(int page_size_log) |
715 | return h - hstates; |
716 | } |
717 | |
718 | -static char *hugetlb_dname(struct dentry *dentry, char *buffer, int buflen) |
719 | -{ |
720 | - return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)", |
721 | - dentry->d_name.name); |
722 | -} |
723 | - |
724 | static struct dentry_operations anon_ops = { |
725 | - .d_dname = hugetlb_dname |
726 | + .d_dname = simple_dname |
727 | }; |
728 | |
729 | /* |
730 | diff --git a/fs/statfs.c b/fs/statfs.c |
731 | index c219e733..083dc0a 100644 |
732 | --- a/fs/statfs.c |
733 | +++ b/fs/statfs.c |
734 | @@ -94,7 +94,7 @@ retry: |
735 | |
736 | int fd_statfs(int fd, struct kstatfs *st) |
737 | { |
738 | - struct fd f = fdget(fd); |
739 | + struct fd f = fdget_raw(fd); |
740 | int error = -EBADF; |
741 | if (f.file) { |
742 | error = vfs_statfs(&f.file->f_path, st); |
743 | diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h |
744 | index 842de22..ded4299 100644 |
745 | --- a/include/linux/compiler-gcc4.h |
746 | +++ b/include/linux/compiler-gcc4.h |
747 | @@ -65,6 +65,21 @@ |
748 | #define __visible __attribute__((externally_visible)) |
749 | #endif |
750 | |
751 | +/* |
752 | + * GCC 'asm goto' miscompiles certain code sequences: |
753 | + * |
754 | + * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 |
755 | + * |
756 | + * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. |
757 | + * Fixed in GCC 4.8.2 and later versions. |
758 | + * |
759 | + * (asm goto is automatically volatile - the naming reflects this.) |
760 | + */ |
761 | +#if GCC_VERSION <= 40801 |
762 | +# define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) |
763 | +#else |
764 | +# define asm_volatile_goto(x...) do { asm goto(x); } while (0) |
765 | +#endif |
766 | |
767 | #ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP |
768 | #if GCC_VERSION >= 40400 |
769 | diff --git a/include/linux/dcache.h b/include/linux/dcache.h |
770 | index 1a6bb81..9be5ac9 100644 |
771 | --- a/include/linux/dcache.h |
772 | +++ b/include/linux/dcache.h |
773 | @@ -332,6 +332,7 @@ extern int d_validate(struct dentry *, struct dentry *); |
774 | * helper function for dentry_operations.d_dname() members |
775 | */ |
776 | extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); |
777 | +extern char *simple_dname(struct dentry *, char *, int); |
778 | |
779 | extern char *__d_path(const struct path *, const struct path *, char *, int); |
780 | extern char *d_absolute_path(const struct path *, char *, int); |
781 | diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h |
782 | index c4d870b..19c19a5 100644 |
783 | --- a/include/linux/ipc_namespace.h |
784 | +++ b/include/linux/ipc_namespace.h |
785 | @@ -22,7 +22,7 @@ struct ipc_ids { |
786 | int in_use; |
787 | unsigned short seq; |
788 | unsigned short seq_max; |
789 | - struct rw_semaphore rw_mutex; |
790 | + struct rw_semaphore rwsem; |
791 | struct idr ipcs_idr; |
792 | int next_id; |
793 | }; |
794 | diff --git a/include/linux/random.h b/include/linux/random.h |
795 | index 3b9377d..6312dd9 100644 |
796 | --- a/include/linux/random.h |
797 | +++ b/include/linux/random.h |
798 | @@ -17,6 +17,7 @@ extern void add_interrupt_randomness(int irq, int irq_flags); |
799 | extern void get_random_bytes(void *buf, int nbytes); |
800 | extern void get_random_bytes_arch(void *buf, int nbytes); |
801 | void generate_random_uuid(unsigned char uuid_out[16]); |
802 | +extern int random_int_secret_init(void); |
803 | |
804 | #ifndef MODULE |
805 | extern const struct file_operations random_fops, urandom_fops; |
806 | diff --git a/include/linux/sem.h b/include/linux/sem.h |
807 | index 53d4265..976ce3a 100644 |
808 | --- a/include/linux/sem.h |
809 | +++ b/include/linux/sem.h |
810 | @@ -12,10 +12,12 @@ struct task_struct; |
811 | struct sem_array { |
812 | struct kern_ipc_perm ____cacheline_aligned_in_smp |
813 | sem_perm; /* permissions .. see ipc.h */ |
814 | - time_t sem_otime; /* last semop time */ |
815 | time_t sem_ctime; /* last change time */ |
816 | struct sem *sem_base; /* ptr to first semaphore in array */ |
817 | - struct list_head sem_pending; /* pending operations to be processed */ |
818 | + struct list_head pending_alter; /* pending operations */ |
819 | + /* that alter the array */ |
820 | + struct list_head pending_const; /* pending complex operations */ |
821 | + /* that do not alter semvals */ |
822 | struct list_head list_id; /* undo requests on this array */ |
823 | int sem_nsems; /* no. of semaphores in array */ |
824 | int complex_count; /* pending complex operations */ |
825 | diff --git a/init/main.c b/init/main.c |
826 | index 9484f4b..e83ac04 100644 |
827 | --- a/init/main.c |
828 | +++ b/init/main.c |
829 | @@ -74,6 +74,7 @@ |
830 | #include <linux/ptrace.h> |
831 | #include <linux/blkdev.h> |
832 | #include <linux/elevator.h> |
833 | +#include <linux/random.h> |
834 | |
835 | #include <asm/io.h> |
836 | #include <asm/bugs.h> |
837 | @@ -777,6 +778,7 @@ static void __init do_basic_setup(void) |
838 | do_ctors(); |
839 | usermodehelper_enable(); |
840 | do_initcalls(); |
841 | + random_int_secret_init(); |
842 | } |
843 | |
844 | static void __init do_pre_smp_initcalls(void) |
845 | diff --git a/ipc/msg.c b/ipc/msg.c |
846 | index f8fbe2c..558aa91 100644 |
847 | --- a/ipc/msg.c |
848 | +++ b/ipc/msg.c |
849 | @@ -70,8 +70,6 @@ struct msg_sender { |
850 | |
851 | #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) |
852 | |
853 | -#define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) |
854 | - |
855 | static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); |
856 | static int newque(struct ipc_namespace *, struct ipc_params *); |
857 | #ifdef CONFIG_PROC_FS |
858 | @@ -141,27 +139,23 @@ void __init msg_init(void) |
859 | IPC_MSG_IDS, sysvipc_msg_proc_show); |
860 | } |
861 | |
862 | -/* |
863 | - * msg_lock_(check_) routines are called in the paths where the rw_mutex |
864 | - * is not held. |
865 | - */ |
866 | -static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id) |
867 | +static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id) |
868 | { |
869 | - struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id); |
870 | + struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id); |
871 | |
872 | if (IS_ERR(ipcp)) |
873 | - return (struct msg_queue *)ipcp; |
874 | + return ERR_CAST(ipcp); |
875 | |
876 | return container_of(ipcp, struct msg_queue, q_perm); |
877 | } |
878 | |
879 | -static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns, |
880 | - int id) |
881 | +static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns, |
882 | + int id) |
883 | { |
884 | - struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id); |
885 | + struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id); |
886 | |
887 | if (IS_ERR(ipcp)) |
888 | - return (struct msg_queue *)ipcp; |
889 | + return ERR_CAST(ipcp); |
890 | |
891 | return container_of(ipcp, struct msg_queue, q_perm); |
892 | } |
893 | @@ -171,12 +165,21 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) |
894 | ipc_rmid(&msg_ids(ns), &s->q_perm); |
895 | } |
896 | |
897 | +static void msg_rcu_free(struct rcu_head *head) |
898 | +{ |
899 | + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); |
900 | + struct msg_queue *msq = ipc_rcu_to_struct(p); |
901 | + |
902 | + security_msg_queue_free(msq); |
903 | + ipc_rcu_free(head); |
904 | +} |
905 | + |
906 | /** |
907 | * newque - Create a new msg queue |
908 | * @ns: namespace |
909 | * @params: ptr to the structure that contains the key and msgflg |
910 | * |
911 | - * Called with msg_ids.rw_mutex held (writer) |
912 | + * Called with msg_ids.rwsem held (writer) |
913 | */ |
914 | static int newque(struct ipc_namespace *ns, struct ipc_params *params) |
915 | { |
916 | @@ -195,17 +198,14 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) |
917 | msq->q_perm.security = NULL; |
918 | retval = security_msg_queue_alloc(msq); |
919 | if (retval) { |
920 | - ipc_rcu_putref(msq); |
921 | + ipc_rcu_putref(msq, ipc_rcu_free); |
922 | return retval; |
923 | } |
924 | |
925 | - /* |
926 | - * ipc_addid() locks msq |
927 | - */ |
928 | + /* ipc_addid() locks msq upon success. */ |
929 | id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); |
930 | if (id < 0) { |
931 | - security_msg_queue_free(msq); |
932 | - ipc_rcu_putref(msq); |
933 | + ipc_rcu_putref(msq, msg_rcu_free); |
934 | return id; |
935 | } |
936 | |
937 | @@ -218,7 +218,8 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) |
938 | INIT_LIST_HEAD(&msq->q_receivers); |
939 | INIT_LIST_HEAD(&msq->q_senders); |
940 | |
941 | - msg_unlock(msq); |
942 | + ipc_unlock_object(&msq->q_perm); |
943 | + rcu_read_unlock(); |
944 | |
945 | return msq->q_perm.id; |
946 | } |
947 | @@ -264,8 +265,8 @@ static void expunge_all(struct msg_queue *msq, int res) |
948 | * removes the message queue from message queue ID IDR, and cleans up all the |
949 | * messages associated with this queue. |
950 | * |
951 | - * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held |
952 | - * before freeque() is called. msg_ids.rw_mutex remains locked on exit. |
953 | + * msg_ids.rwsem (writer) and the spinlock for this message queue are held |
954 | + * before freeque() is called. msg_ids.rwsem remains locked on exit. |
955 | */ |
956 | static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) |
957 | { |
958 | @@ -275,19 +276,19 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) |
959 | expunge_all(msq, -EIDRM); |
960 | ss_wakeup(&msq->q_senders, 1); |
961 | msg_rmid(ns, msq); |
962 | - msg_unlock(msq); |
963 | + ipc_unlock_object(&msq->q_perm); |
964 | + rcu_read_unlock(); |
965 | |
966 | list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { |
967 | atomic_dec(&ns->msg_hdrs); |
968 | free_msg(msg); |
969 | } |
970 | atomic_sub(msq->q_cbytes, &ns->msg_bytes); |
971 | - security_msg_queue_free(msq); |
972 | - ipc_rcu_putref(msq); |
973 | + ipc_rcu_putref(msq, msg_rcu_free); |
974 | } |
975 | |
976 | /* |
977 | - * Called with msg_ids.rw_mutex and ipcp locked. |
978 | + * Called with msg_ids.rwsem and ipcp locked. |
979 | */ |
980 | static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) |
981 | { |
982 | @@ -391,9 +392,9 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) |
983 | } |
984 | |
985 | /* |
986 | - * This function handles some msgctl commands which require the rw_mutex |
987 | + * This function handles some msgctl commands which require the rwsem |
988 | * to be held in write mode. |
989 | - * NOTE: no locks must be held, the rw_mutex is taken inside this function. |
990 | + * NOTE: no locks must be held, the rwsem is taken inside this function. |
991 | */ |
992 | static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, |
993 | struct msqid_ds __user *buf, int version) |
994 | @@ -408,31 +409,39 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, |
995 | return -EFAULT; |
996 | } |
997 | |
998 | - ipcp = ipcctl_pre_down(ns, &msg_ids(ns), msqid, cmd, |
999 | - &msqid64.msg_perm, msqid64.msg_qbytes); |
1000 | - if (IS_ERR(ipcp)) |
1001 | - return PTR_ERR(ipcp); |
1002 | + down_write(&msg_ids(ns).rwsem); |
1003 | + rcu_read_lock(); |
1004 | + |
1005 | + ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, |
1006 | + &msqid64.msg_perm, msqid64.msg_qbytes); |
1007 | + if (IS_ERR(ipcp)) { |
1008 | + err = PTR_ERR(ipcp); |
1009 | + goto out_unlock1; |
1010 | + } |
1011 | |
1012 | msq = container_of(ipcp, struct msg_queue, q_perm); |
1013 | |
1014 | err = security_msg_queue_msgctl(msq, cmd); |
1015 | if (err) |
1016 | - goto out_unlock; |
1017 | + goto out_unlock1; |
1018 | |
1019 | switch (cmd) { |
1020 | case IPC_RMID: |
1021 | + ipc_lock_object(&msq->q_perm); |
1022 | + /* freeque unlocks the ipc object and rcu */ |
1023 | freeque(ns, ipcp); |
1024 | goto out_up; |
1025 | case IPC_SET: |
1026 | if (msqid64.msg_qbytes > ns->msg_ctlmnb && |
1027 | !capable(CAP_SYS_RESOURCE)) { |
1028 | err = -EPERM; |
1029 | - goto out_unlock; |
1030 | + goto out_unlock1; |
1031 | } |
1032 | |
1033 | + ipc_lock_object(&msq->q_perm); |
1034 | err = ipc_update_perm(&msqid64.msg_perm, ipcp); |
1035 | if (err) |
1036 | - goto out_unlock; |
1037 | + goto out_unlock0; |
1038 | |
1039 | msq->q_qbytes = msqid64.msg_qbytes; |
1040 | |
1041 | @@ -448,25 +457,23 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, |
1042 | break; |
1043 | default: |
1044 | err = -EINVAL; |
1045 | + goto out_unlock1; |
1046 | } |
1047 | -out_unlock: |
1048 | - msg_unlock(msq); |
1049 | + |
1050 | +out_unlock0: |
1051 | + ipc_unlock_object(&msq->q_perm); |
1052 | +out_unlock1: |
1053 | + rcu_read_unlock(); |
1054 | out_up: |
1055 | - up_write(&msg_ids(ns).rw_mutex); |
1056 | + up_write(&msg_ids(ns).rwsem); |
1057 | return err; |
1058 | } |
1059 | |
1060 | -SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) |
1061 | +static int msgctl_nolock(struct ipc_namespace *ns, int msqid, |
1062 | + int cmd, int version, void __user *buf) |
1063 | { |
1064 | + int err; |
1065 | struct msg_queue *msq; |
1066 | - int err, version; |
1067 | - struct ipc_namespace *ns; |
1068 | - |
1069 | - if (msqid < 0 || cmd < 0) |
1070 | - return -EINVAL; |
1071 | - |
1072 | - version = ipc_parse_version(&cmd); |
1073 | - ns = current->nsproxy->ipc_ns; |
1074 | |
1075 | switch (cmd) { |
1076 | case IPC_INFO: |
1077 | @@ -477,6 +484,7 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) |
1078 | |
1079 | if (!buf) |
1080 | return -EFAULT; |
1081 | + |
1082 | /* |
1083 | * We must not return kernel stack data. |
1084 | * due to padding, it's not enough |
1085 | @@ -492,7 +500,7 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) |
1086 | msginfo.msgmnb = ns->msg_ctlmnb; |
1087 | msginfo.msgssz = MSGSSZ; |
1088 | msginfo.msgseg = MSGSEG; |
1089 | - down_read(&msg_ids(ns).rw_mutex); |
1090 | + down_read(&msg_ids(ns).rwsem); |
1091 | if (cmd == MSG_INFO) { |
1092 | msginfo.msgpool = msg_ids(ns).in_use; |
1093 | msginfo.msgmap = atomic_read(&ns->msg_hdrs); |
1094 | @@ -503,12 +511,13 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) |
1095 | msginfo.msgtql = MSGTQL; |
1096 | } |
1097 | max_id = ipc_get_maxid(&msg_ids(ns)); |
1098 | - up_read(&msg_ids(ns).rw_mutex); |
1099 | + up_read(&msg_ids(ns).rwsem); |
1100 | if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) |
1101 | return -EFAULT; |
1102 | return (max_id < 0) ? 0 : max_id; |
1103 | } |
1104 | - case MSG_STAT: /* msqid is an index rather than a msg queue id */ |
1105 | + |
1106 | + case MSG_STAT: |
1107 | case IPC_STAT: |
1108 | { |
1109 | struct msqid64_ds tbuf; |
1110 | @@ -517,17 +526,25 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) |
1111 | if (!buf) |
1112 | return -EFAULT; |
1113 | |
1114 | + memset(&tbuf, 0, sizeof(tbuf)); |
1115 | + |
1116 | + rcu_read_lock(); |
1117 | if (cmd == MSG_STAT) { |
1118 | - msq = msg_lock(ns, msqid); |
1119 | - if (IS_ERR(msq)) |
1120 | - return PTR_ERR(msq); |
1121 | + msq = msq_obtain_object(ns, msqid); |
1122 | + if (IS_ERR(msq)) { |
1123 | + err = PTR_ERR(msq); |
1124 | + goto out_unlock; |
1125 | + } |
1126 | success_return = msq->q_perm.id; |
1127 | } else { |
1128 | - msq = msg_lock_check(ns, msqid); |
1129 | - if (IS_ERR(msq)) |
1130 | - return PTR_ERR(msq); |
1131 | + msq = msq_obtain_object_check(ns, msqid); |
1132 | + if (IS_ERR(msq)) { |
1133 | + err = PTR_ERR(msq); |
1134 | + goto out_unlock; |
1135 | + } |
1136 | success_return = 0; |
1137 | } |
1138 | + |
1139 | err = -EACCES; |
1140 | if (ipcperms(ns, &msq->q_perm, S_IRUGO)) |
1141 | goto out_unlock; |
1142 | @@ -536,8 +553,6 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) |
1143 | if (err) |
1144 | goto out_unlock; |
1145 | |
1146 | - memset(&tbuf, 0, sizeof(tbuf)); |
1147 | - |
1148 | kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); |
1149 | tbuf.msg_stime = msq->q_stime; |
1150 | tbuf.msg_rtime = msq->q_rtime; |
1151 | @@ -547,24 +562,48 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) |
1152 | tbuf.msg_qbytes = msq->q_qbytes; |
1153 | tbuf.msg_lspid = msq->q_lspid; |
1154 | tbuf.msg_lrpid = msq->q_lrpid; |
1155 | - msg_unlock(msq); |
1156 | + rcu_read_unlock(); |
1157 | + |
1158 | if (copy_msqid_to_user(buf, &tbuf, version)) |
1159 | return -EFAULT; |
1160 | return success_return; |
1161 | } |
1162 | - case IPC_SET: |
1163 | - case IPC_RMID: |
1164 | - err = msgctl_down(ns, msqid, cmd, buf, version); |
1165 | - return err; |
1166 | + |
1167 | default: |
1168 | - return -EINVAL; |
1169 | + return -EINVAL; |
1170 | } |
1171 | |
1172 | + return err; |
1173 | out_unlock: |
1174 | - msg_unlock(msq); |
1175 | + rcu_read_unlock(); |
1176 | return err; |
1177 | } |
1178 | |
1179 | +SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) |
1180 | +{ |
1181 | + int version; |
1182 | + struct ipc_namespace *ns; |
1183 | + |
1184 | + if (msqid < 0 || cmd < 0) |
1185 | + return -EINVAL; |
1186 | + |
1187 | + version = ipc_parse_version(&cmd); |
1188 | + ns = current->nsproxy->ipc_ns; |
1189 | + |
1190 | + switch (cmd) { |
1191 | + case IPC_INFO: |
1192 | + case MSG_INFO: |
1193 | + case MSG_STAT: /* msqid is an index rather than a msg queue id */ |
1194 | + case IPC_STAT: |
1195 | + return msgctl_nolock(ns, msqid, cmd, version, buf); |
1196 | + case IPC_SET: |
1197 | + case IPC_RMID: |
1198 | + return msgctl_down(ns, msqid, cmd, buf, version); |
1199 | + default: |
1200 | + return -EINVAL; |
1201 | + } |
1202 | +} |
1203 | + |
1204 | static int testmsg(struct msg_msg *msg, long type, int mode) |
1205 | { |
1206 | switch(mode) |
1207 | @@ -640,22 +679,31 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, |
1208 | msg->m_type = mtype; |
1209 | msg->m_ts = msgsz; |
1210 | |
1211 | - msq = msg_lock_check(ns, msqid); |
1212 | + rcu_read_lock(); |
1213 | + msq = msq_obtain_object_check(ns, msqid); |
1214 | if (IS_ERR(msq)) { |
1215 | err = PTR_ERR(msq); |
1216 | - goto out_free; |
1217 | + goto out_unlock1; |
1218 | } |
1219 | |
1220 | + ipc_lock_object(&msq->q_perm); |
1221 | + |
1222 | for (;;) { |
1223 | struct msg_sender s; |
1224 | |
1225 | err = -EACCES; |
1226 | if (ipcperms(ns, &msq->q_perm, S_IWUGO)) |
1227 | - goto out_unlock_free; |
1228 | + goto out_unlock0; |
1229 | + |
1230 | + /* raced with RMID? */ |
1231 | + if (msq->q_perm.deleted) { |
1232 | + err = -EIDRM; |
1233 | + goto out_unlock0; |
1234 | + } |
1235 | |
1236 | err = security_msg_queue_msgsnd(msq, msg, msgflg); |
1237 | if (err) |
1238 | - goto out_unlock_free; |
1239 | + goto out_unlock0; |
1240 | |
1241 | if (msgsz + msq->q_cbytes <= msq->q_qbytes && |
1242 | 1 + msq->q_qnum <= msq->q_qbytes) { |
1243 | @@ -665,32 +713,37 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, |
1244 | /* queue full, wait: */ |
1245 | if (msgflg & IPC_NOWAIT) { |
1246 | err = -EAGAIN; |
1247 | - goto out_unlock_free; |
1248 | + goto out_unlock0; |
1249 | } |
1250 | + |
1251 | ss_add(msq, &s); |
1252 | |
1253 | if (!ipc_rcu_getref(msq)) { |
1254 | err = -EIDRM; |
1255 | - goto out_unlock_free; |
1256 | + goto out_unlock0; |
1257 | } |
1258 | |
1259 | - msg_unlock(msq); |
1260 | + ipc_unlock_object(&msq->q_perm); |
1261 | + rcu_read_unlock(); |
1262 | schedule(); |
1263 | |
1264 | - ipc_lock_by_ptr(&msq->q_perm); |
1265 | - ipc_rcu_putref(msq); |
1266 | + rcu_read_lock(); |
1267 | + ipc_lock_object(&msq->q_perm); |
1268 | + |
1269 | + ipc_rcu_putref(msq, ipc_rcu_free); |
1270 | if (msq->q_perm.deleted) { |
1271 | err = -EIDRM; |
1272 | - goto out_unlock_free; |
1273 | + goto out_unlock0; |
1274 | } |
1275 | + |
1276 | ss_del(&s); |
1277 | |
1278 | if (signal_pending(current)) { |
1279 | err = -ERESTARTNOHAND; |
1280 | - goto out_unlock_free; |
1281 | + goto out_unlock0; |
1282 | } |
1283 | - } |
1284 | |
1285 | + } |
1286 | msq->q_lspid = task_tgid_vnr(current); |
1287 | msq->q_stime = get_seconds(); |
1288 | |
1289 | @@ -706,9 +759,10 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, |
1290 | err = 0; |
1291 | msg = NULL; |
1292 | |
1293 | -out_unlock_free: |
1294 | - msg_unlock(msq); |
1295 | -out_free: |
1296 | +out_unlock0: |
1297 | + ipc_unlock_object(&msq->q_perm); |
1298 | +out_unlock1: |
1299 | + rcu_read_unlock(); |
1300 | if (msg != NULL) |
1301 | free_msg(msg); |
1302 | return err; |
1303 | @@ -817,21 +871,19 @@ static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) |
1304 | return found ?: ERR_PTR(-EAGAIN); |
1305 | } |
1306 | |
1307 | - |
1308 | -long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, |
1309 | - int msgflg, |
1310 | +long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, |
1311 | long (*msg_handler)(void __user *, struct msg_msg *, size_t)) |
1312 | { |
1313 | - struct msg_queue *msq; |
1314 | - struct msg_msg *msg; |
1315 | int mode; |
1316 | + struct msg_queue *msq; |
1317 | struct ipc_namespace *ns; |
1318 | - struct msg_msg *copy = NULL; |
1319 | + struct msg_msg *msg, *copy = NULL; |
1320 | |
1321 | ns = current->nsproxy->ipc_ns; |
1322 | |
1323 | if (msqid < 0 || (long) bufsz < 0) |
1324 | return -EINVAL; |
1325 | + |
1326 | if (msgflg & MSG_COPY) { |
1327 | copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); |
1328 | if (IS_ERR(copy)) |
1329 | @@ -839,8 +891,10 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, |
1330 | } |
1331 | mode = convert_mode(&msgtyp, msgflg); |
1332 | |
1333 | - msq = msg_lock_check(ns, msqid); |
1334 | + rcu_read_lock(); |
1335 | + msq = msq_obtain_object_check(ns, msqid); |
1336 | if (IS_ERR(msq)) { |
1337 | + rcu_read_unlock(); |
1338 | free_copy(copy); |
1339 | return PTR_ERR(msq); |
1340 | } |
1341 | @@ -850,10 +904,17 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, |
1342 | |
1343 | msg = ERR_PTR(-EACCES); |
1344 | if (ipcperms(ns, &msq->q_perm, S_IRUGO)) |
1345 | - goto out_unlock; |
1346 | + goto out_unlock1; |
1347 | |
1348 | - msg = find_msg(msq, &msgtyp, mode); |
1349 | + ipc_lock_object(&msq->q_perm); |
1350 | + |
1351 | + /* raced with RMID? */ |
1352 | + if (msq->q_perm.deleted) { |
1353 | + msg = ERR_PTR(-EIDRM); |
1354 | + goto out_unlock0; |
1355 | + } |
1356 | |
1357 | + msg = find_msg(msq, &msgtyp, mode); |
1358 | if (!IS_ERR(msg)) { |
1359 | /* |
1360 | * Found a suitable message. |
1361 | @@ -861,7 +922,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, |
1362 | */ |
1363 | if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { |
1364 | msg = ERR_PTR(-E2BIG); |
1365 | - goto out_unlock; |
1366 | + goto out_unlock0; |
1367 | } |
1368 | /* |
1369 | * If we are copying, then do not unlink message and do |
1370 | @@ -869,8 +930,9 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, |
1371 | */ |
1372 | if (msgflg & MSG_COPY) { |
1373 | msg = copy_msg(msg, copy); |
1374 | - goto out_unlock; |
1375 | + goto out_unlock0; |
1376 | } |
1377 | + |
1378 | list_del(&msg->m_list); |
1379 | msq->q_qnum--; |
1380 | msq->q_rtime = get_seconds(); |
1381 | @@ -879,14 +941,16 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, |
1382 | atomic_sub(msg->m_ts, &ns->msg_bytes); |
1383 | atomic_dec(&ns->msg_hdrs); |
1384 | ss_wakeup(&msq->q_senders, 0); |
1385 | - msg_unlock(msq); |
1386 | - break; |
1387 | + |
1388 | + goto out_unlock0; |
1389 | } |
1390 | + |
1391 | /* No message waiting. Wait for a message */ |
1392 | if (msgflg & IPC_NOWAIT) { |
1393 | msg = ERR_PTR(-ENOMSG); |
1394 | - goto out_unlock; |
1395 | + goto out_unlock0; |
1396 | } |
1397 | + |
1398 | list_add_tail(&msr_d.r_list, &msq->q_receivers); |
1399 | msr_d.r_tsk = current; |
1400 | msr_d.r_msgtype = msgtyp; |
1401 | @@ -897,8 +961,9 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, |
1402 | msr_d.r_maxsize = bufsz; |
1403 | msr_d.r_msg = ERR_PTR(-EAGAIN); |
1404 | current->state = TASK_INTERRUPTIBLE; |
1405 | - msg_unlock(msq); |
1406 | |
1407 | + ipc_unlock_object(&msq->q_perm); |
1408 | + rcu_read_unlock(); |
1409 | schedule(); |
1410 | |
1411 | /* Lockless receive, part 1: |
1412 | @@ -909,7 +974,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, |
1413 | * Prior to destruction, expunge_all(-EIRDM) changes r_msg. |
1414 | * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. |
1415 | * rcu_read_lock() prevents preemption between reading r_msg |
1416 | - * and the spin_lock() inside ipc_lock_by_ptr(). |
1417 | + * and acquiring the q_perm.lock in ipc_lock_object(). |
1418 | */ |
1419 | rcu_read_lock(); |
1420 | |
1421 | @@ -928,32 +993,34 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, |
1422 | * If there is a message or an error then accept it without |
1423 | * locking. |
1424 | */ |
1425 | - if (msg != ERR_PTR(-EAGAIN)) { |
1426 | - rcu_read_unlock(); |
1427 | - break; |
1428 | - } |
1429 | + if (msg != ERR_PTR(-EAGAIN)) |
1430 | + goto out_unlock1; |
1431 | |
1432 | /* Lockless receive, part 3: |
1433 | * Acquire the queue spinlock. |
1434 | */ |
1435 | - ipc_lock_by_ptr(&msq->q_perm); |
1436 | - rcu_read_unlock(); |
1437 | + ipc_lock_object(&msq->q_perm); |
1438 | |
1439 | /* Lockless receive, part 4: |
1440 | * Repeat test after acquiring the spinlock. |
1441 | */ |
1442 | msg = (struct msg_msg*)msr_d.r_msg; |
1443 | if (msg != ERR_PTR(-EAGAIN)) |
1444 | - goto out_unlock; |
1445 | + goto out_unlock0; |
1446 | |
1447 | list_del(&msr_d.r_list); |
1448 | if (signal_pending(current)) { |
1449 | msg = ERR_PTR(-ERESTARTNOHAND); |
1450 | -out_unlock: |
1451 | - msg_unlock(msq); |
1452 | - break; |
1453 | + goto out_unlock0; |
1454 | } |
1455 | + |
1456 | + ipc_unlock_object(&msq->q_perm); |
1457 | } |
1458 | + |
1459 | +out_unlock0: |
1460 | + ipc_unlock_object(&msq->q_perm); |
1461 | +out_unlock1: |
1462 | + rcu_read_unlock(); |
1463 | if (IS_ERR(msg)) { |
1464 | free_copy(copy); |
1465 | return PTR_ERR(msg); |
1466 | diff --git a/ipc/namespace.c b/ipc/namespace.c |
1467 | index 7ee61bf..aba9a58 100644 |
1468 | --- a/ipc/namespace.c |
1469 | +++ b/ipc/namespace.c |
1470 | @@ -81,7 +81,7 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, |
1471 | int next_id; |
1472 | int total, in_use; |
1473 | |
1474 | - down_write(&ids->rw_mutex); |
1475 | + down_write(&ids->rwsem); |
1476 | |
1477 | in_use = ids->in_use; |
1478 | |
1479 | @@ -89,11 +89,12 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, |
1480 | perm = idr_find(&ids->ipcs_idr, next_id); |
1481 | if (perm == NULL) |
1482 | continue; |
1483 | - ipc_lock_by_ptr(perm); |
1484 | + rcu_read_lock(); |
1485 | + ipc_lock_object(perm); |
1486 | free(ns, perm); |
1487 | total++; |
1488 | } |
1489 | - up_write(&ids->rw_mutex); |
1490 | + up_write(&ids->rwsem); |
1491 | } |
1492 | |
1493 | static void free_ipc_ns(struct ipc_namespace *ns) |
1494 | diff --git a/ipc/sem.c b/ipc/sem.c |
1495 | index 70480a3..8c4f59b 100644 |
1496 | --- a/ipc/sem.c |
1497 | +++ b/ipc/sem.c |
1498 | @@ -95,8 +95,12 @@ struct sem { |
1499 | int semval; /* current value */ |
1500 | int sempid; /* pid of last operation */ |
1501 | spinlock_t lock; /* spinlock for fine-grained semtimedop */ |
1502 | - struct list_head sem_pending; /* pending single-sop operations */ |
1503 | -}; |
1504 | + struct list_head pending_alter; /* pending single-sop operations */ |
1505 | + /* that alter the semaphore */ |
1506 | + struct list_head pending_const; /* pending single-sop operations */ |
1507 | + /* that do not alter the semaphore*/ |
1508 | + time_t sem_otime; /* candidate for sem_otime */ |
1509 | +} ____cacheline_aligned_in_smp; |
1510 | |
1511 | /* One queue for each sleeping process in the system. */ |
1512 | struct sem_queue { |
1513 | @@ -150,12 +154,15 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it); |
1514 | #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ |
1515 | |
1516 | /* |
1517 | - * linked list protection: |
1518 | + * Locking: |
1519 | * sem_undo.id_next, |
1520 | - * sem_array.sem_pending{,last}, |
1521 | - * sem_array.sem_undo: sem_lock() for read/write |
1522 | + * sem_array.complex_count, |
1523 | + * sem_array.pending{_alter,_cont}, |
1524 | + * sem_array.sem_undo: global sem_lock() for read/write |
1525 | * sem_undo.proc_next: only "current" is allowed to read/write that field. |
1526 | * |
1527 | + * sem_array.sem_base[i].pending_{const,alter}: |
1528 | + * global or semaphore sem_lock() for read/write |
1529 | */ |
1530 | |
1531 | #define sc_semmsl sem_ctls[0] |
1532 | @@ -189,77 +196,176 @@ void __init sem_init (void) |
1533 | IPC_SEM_IDS, sysvipc_sem_proc_show); |
1534 | } |
1535 | |
1536 | +/** |
1537 | + * unmerge_queues - unmerge queues, if possible. |
1538 | + * @sma: semaphore array |
1539 | + * |
1540 | + * The function unmerges the wait queues if complex_count is 0. |
1541 | + * It must be called prior to dropping the global semaphore array lock. |
1542 | + */ |
1543 | +static void unmerge_queues(struct sem_array *sma) |
1544 | +{ |
1545 | + struct sem_queue *q, *tq; |
1546 | + |
1547 | + /* complex operations still around? */ |
1548 | + if (sma->complex_count) |
1549 | + return; |
1550 | + /* |
1551 | + * We will switch back to simple mode. |
1552 | + * Move all pending operation back into the per-semaphore |
1553 | + * queues. |
1554 | + */ |
1555 | + list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { |
1556 | + struct sem *curr; |
1557 | + curr = &sma->sem_base[q->sops[0].sem_num]; |
1558 | + |
1559 | + list_add_tail(&q->list, &curr->pending_alter); |
1560 | + } |
1561 | + INIT_LIST_HEAD(&sma->pending_alter); |
1562 | +} |
1563 | + |
1564 | +/** |
1565 | + * merge_queues - Merge single semop queues into global queue |
1566 | + * @sma: semaphore array |
1567 | + * |
1568 | + * This function merges all per-semaphore queues into the global queue. |
1569 | + * It is necessary to achieve FIFO ordering for the pending single-sop |
1570 | + * operations when a multi-semop operation must sleep. |
1571 | + * Only the alter operations must be moved, the const operations can stay. |
1572 | + */ |
1573 | +static void merge_queues(struct sem_array *sma) |
1574 | +{ |
1575 | + int i; |
1576 | + for (i = 0; i < sma->sem_nsems; i++) { |
1577 | + struct sem *sem = sma->sem_base + i; |
1578 | + |
1579 | + list_splice_init(&sem->pending_alter, &sma->pending_alter); |
1580 | + } |
1581 | +} |
1582 | + |
1583 | +static void sem_rcu_free(struct rcu_head *head) |
1584 | +{ |
1585 | + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); |
1586 | + struct sem_array *sma = ipc_rcu_to_struct(p); |
1587 | + |
1588 | + security_sem_free(sma); |
1589 | + ipc_rcu_free(head); |
1590 | +} |
1591 | + |
1592 | +/* |
1593 | + * Wait until all currently ongoing simple ops have completed. |
1594 | + * Caller must own sem_perm.lock. |
1595 | + * New simple ops cannot start, because simple ops first check |
1596 | + * that sem_perm.lock is free. |
1597 | + * that a) sem_perm.lock is free and b) complex_count is 0. |
1598 | + */ |
1599 | +static void sem_wait_array(struct sem_array *sma) |
1600 | +{ |
1601 | + int i; |
1602 | + struct sem *sem; |
1603 | + |
1604 | + if (sma->complex_count) { |
1605 | + /* The thread that increased sma->complex_count waited on |
1606 | + * all sem->lock locks. Thus we don't need to wait again. |
1607 | + */ |
1608 | + return; |
1609 | + } |
1610 | + |
1611 | + for (i = 0; i < sma->sem_nsems; i++) { |
1612 | + sem = sma->sem_base + i; |
1613 | + spin_unlock_wait(&sem->lock); |
1614 | + } |
1615 | +} |
1616 | + |
1617 | /* |
1618 | * If the request contains only one semaphore operation, and there are |
1619 | * no complex transactions pending, lock only the semaphore involved. |
1620 | * Otherwise, lock the entire semaphore array, since we either have |
1621 | * multiple semaphores in our own semops, or we need to look at |
1622 | * semaphores from other pending complex operations. |
1623 | - * |
1624 | - * Carefully guard against sma->complex_count changing between zero |
1625 | - * and non-zero while we are spinning for the lock. The value of |
1626 | - * sma->complex_count cannot change while we are holding the lock, |
1627 | - * so sem_unlock should be fine. |
1628 | - * |
1629 | - * The global lock path checks that all the local locks have been released, |
1630 | - * checking each local lock once. This means that the local lock paths |
1631 | - * cannot start their critical sections while the global lock is held. |
1632 | */ |
1633 | static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, |
1634 | int nsops) |
1635 | { |
1636 | - int locknum; |
1637 | - again: |
1638 | - if (nsops == 1 && !sma->complex_count) { |
1639 | - struct sem *sem = sma->sem_base + sops->sem_num; |
1640 | + struct sem *sem; |
1641 | |
1642 | - /* Lock just the semaphore we are interested in. */ |
1643 | - spin_lock(&sem->lock); |
1644 | + if (nsops != 1) { |
1645 | + /* Complex operation - acquire a full lock */ |
1646 | + ipc_lock_object(&sma->sem_perm); |
1647 | |
1648 | - /* |
1649 | - * If sma->complex_count was set while we were spinning, |
1650 | - * we may need to look at things we did not lock here. |
1651 | + /* And wait until all simple ops that are processed |
1652 | + * right now have dropped their locks. |
1653 | */ |
1654 | - if (unlikely(sma->complex_count)) { |
1655 | - spin_unlock(&sem->lock); |
1656 | - goto lock_array; |
1657 | - } |
1658 | + sem_wait_array(sma); |
1659 | + return -1; |
1660 | + } |
1661 | + |
1662 | + /* |
1663 | + * Only one semaphore affected - try to optimize locking. |
1664 | + * The rules are: |
1665 | + * - optimized locking is possible if no complex operation |
1666 | + * is either enqueued or processed right now. |
1667 | + * - The test for enqueued complex ops is simple: |
1668 | + * sma->complex_count != 0 |
1669 | + * - Testing for complex ops that are processed right now is |
1670 | + * a bit more difficult. Complex ops acquire the full lock |
1671 | + * and first wait that the running simple ops have completed. |
1672 | + * (see above) |
1673 | + * Thus: If we own a simple lock and the global lock is free |
1674 | + * and complex_count is now 0, then it will stay 0 and |
1675 | + * thus just locking sem->lock is sufficient. |
1676 | + */ |
1677 | + sem = sma->sem_base + sops->sem_num; |
1678 | |
1679 | + if (sma->complex_count == 0) { |
1680 | /* |
1681 | - * Another process is holding the global lock on the |
1682 | - * sem_array; we cannot enter our critical section, |
1683 | - * but have to wait for the global lock to be released. |
1684 | + * It appears that no complex operation is around. |
1685 | + * Acquire the per-semaphore lock. |
1686 | */ |
1687 | - if (unlikely(spin_is_locked(&sma->sem_perm.lock))) { |
1688 | - spin_unlock(&sem->lock); |
1689 | - spin_unlock_wait(&sma->sem_perm.lock); |
1690 | - goto again; |
1691 | + spin_lock(&sem->lock); |
1692 | + |
1693 | + /* Then check that the global lock is free */ |
1694 | + if (!spin_is_locked(&sma->sem_perm.lock)) { |
1695 | + /* spin_is_locked() is not a memory barrier */ |
1696 | + smp_mb(); |
1697 | + |
1698 | + /* Now repeat the test of complex_count: |
1699 | + * It can't change anymore until we drop sem->lock. |
1700 | + * Thus: if is now 0, then it will stay 0. |
1701 | + */ |
1702 | + if (sma->complex_count == 0) { |
1703 | + /* fast path successful! */ |
1704 | + return sops->sem_num; |
1705 | + } |
1706 | } |
1707 | + spin_unlock(&sem->lock); |
1708 | + } |
1709 | + |
1710 | + /* slow path: acquire the full lock */ |
1711 | + ipc_lock_object(&sma->sem_perm); |
1712 | |
1713 | - locknum = sops->sem_num; |
1714 | + if (sma->complex_count == 0) { |
1715 | + /* False alarm: |
1716 | + * There is no complex operation, thus we can switch |
1717 | + * back to the fast path. |
1718 | + */ |
1719 | + spin_lock(&sem->lock); |
1720 | + ipc_unlock_object(&sma->sem_perm); |
1721 | + return sops->sem_num; |
1722 | } else { |
1723 | - int i; |
1724 | - /* |
1725 | - * Lock the semaphore array, and wait for all of the |
1726 | - * individual semaphore locks to go away. The code |
1727 | - * above ensures no new single-lock holders will enter |
1728 | - * their critical section while the array lock is held. |
1729 | + /* Not a false alarm, thus complete the sequence for a |
1730 | + * full lock. |
1731 | */ |
1732 | - lock_array: |
1733 | - spin_lock(&sma->sem_perm.lock); |
1734 | - for (i = 0; i < sma->sem_nsems; i++) { |
1735 | - struct sem *sem = sma->sem_base + i; |
1736 | - spin_unlock_wait(&sem->lock); |
1737 | - } |
1738 | - locknum = -1; |
1739 | + sem_wait_array(sma); |
1740 | + return -1; |
1741 | } |
1742 | - return locknum; |
1743 | } |
1744 | |
1745 | static inline void sem_unlock(struct sem_array *sma, int locknum) |
1746 | { |
1747 | if (locknum == -1) { |
1748 | - spin_unlock(&sma->sem_perm.lock); |
1749 | + unmerge_queues(sma); |
1750 | + ipc_unlock_object(&sma->sem_perm); |
1751 | } else { |
1752 | struct sem *sem = sma->sem_base + locknum; |
1753 | spin_unlock(&sem->lock); |
1754 | @@ -267,7 +373,7 @@ static inline void sem_unlock(struct sem_array *sma, int locknum) |
1755 | } |
1756 | |
1757 | /* |
1758 | - * sem_lock_(check_) routines are called in the paths where the rw_mutex |
1759 | + * sem_lock_(check_) routines are called in the paths where the rwsem |
1760 | * is not held. |
1761 | * |
1762 | * The caller holds the RCU read lock. |
1763 | @@ -319,12 +425,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns |
1764 | static inline void sem_lock_and_putref(struct sem_array *sma) |
1765 | { |
1766 | sem_lock(sma, NULL, -1); |
1767 | - ipc_rcu_putref(sma); |
1768 | -} |
1769 | - |
1770 | -static inline void sem_putref(struct sem_array *sma) |
1771 | -{ |
1772 | - ipc_rcu_putref(sma); |
1773 | + ipc_rcu_putref(sma, ipc_rcu_free); |
1774 | } |
1775 | |
1776 | static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) |
1777 | @@ -337,7 +438,7 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) |
1778 | * Without the check/retry algorithm a lockless wakeup is possible: |
1779 | * - queue.status is initialized to -EINTR before blocking. |
1780 | * - wakeup is performed by |
1781 | - * * unlinking the queue entry from sma->sem_pending |
1782 | + * * unlinking the queue entry from the pending list |
1783 | * * setting queue.status to IN_WAKEUP |
1784 | * This is the notification for the blocked thread that a |
1785 | * result value is imminent. |
1786 | @@ -371,7 +472,7 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) |
1787 | * @ns: namespace |
1788 | * @params: ptr to the structure that contains key, semflg and nsems |
1789 | * |
1790 | - * Called with sem_ids.rw_mutex held (as a writer) |
1791 | + * Called with sem_ids.rwsem held (as a writer) |
1792 | */ |
1793 | |
1794 | static int newary(struct ipc_namespace *ns, struct ipc_params *params) |
1795 | @@ -403,14 +504,13 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) |
1796 | sma->sem_perm.security = NULL; |
1797 | retval = security_sem_alloc(sma); |
1798 | if (retval) { |
1799 | - ipc_rcu_putref(sma); |
1800 | + ipc_rcu_putref(sma, ipc_rcu_free); |
1801 | return retval; |
1802 | } |
1803 | |
1804 | id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); |
1805 | if (id < 0) { |
1806 | - security_sem_free(sma); |
1807 | - ipc_rcu_putref(sma); |
1808 | + ipc_rcu_putref(sma, sem_rcu_free); |
1809 | return id; |
1810 | } |
1811 | ns->used_sems += nsems; |
1812 | @@ -418,12 +518,14 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) |
1813 | sma->sem_base = (struct sem *) &sma[1]; |
1814 | |
1815 | for (i = 0; i < nsems; i++) { |
1816 | - INIT_LIST_HEAD(&sma->sem_base[i].sem_pending); |
1817 | + INIT_LIST_HEAD(&sma->sem_base[i].pending_alter); |
1818 | + INIT_LIST_HEAD(&sma->sem_base[i].pending_const); |
1819 | spin_lock_init(&sma->sem_base[i].lock); |
1820 | } |
1821 | |
1822 | sma->complex_count = 0; |
1823 | - INIT_LIST_HEAD(&sma->sem_pending); |
1824 | + INIT_LIST_HEAD(&sma->pending_alter); |
1825 | + INIT_LIST_HEAD(&sma->pending_const); |
1826 | INIT_LIST_HEAD(&sma->list_id); |
1827 | sma->sem_nsems = nsems; |
1828 | sma->sem_ctime = get_seconds(); |
1829 | @@ -435,7 +537,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) |
1830 | |
1831 | |
1832 | /* |
1833 | - * Called with sem_ids.rw_mutex and ipcp locked. |
1834 | + * Called with sem_ids.rwsem and ipcp locked. |
1835 | */ |
1836 | static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) |
1837 | { |
1838 | @@ -446,7 +548,7 @@ static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) |
1839 | } |
1840 | |
1841 | /* |
1842 | - * Called with sem_ids.rw_mutex and ipcp locked. |
1843 | + * Called with sem_ids.rwsem and ipcp locked. |
1844 | */ |
1845 | static inline int sem_more_checks(struct kern_ipc_perm *ipcp, |
1846 | struct ipc_params *params) |
1847 | @@ -482,12 +584,19 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) |
1848 | return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); |
1849 | } |
1850 | |
1851 | -/* |
1852 | - * Determine whether a sequence of semaphore operations would succeed |
1853 | - * all at once. Return 0 if yes, 1 if need to sleep, else return error code. |
1854 | +/** perform_atomic_semop - Perform (if possible) a semaphore operation |
1855 | + * @sma: semaphore array |
1856 | + * @sops: array with operations that should be checked |
1857 | + * @nsems: number of sops |
1858 | + * @un: undo array |
1859 | + * @pid: pid that did the change |
1860 | + * |
1861 | + * Returns 0 if the operation was possible. |
1862 | + * Returns 1 if the operation is impossible, the caller must sleep. |
1863 | + * Negative values are error codes. |
1864 | */ |
1865 | |
1866 | -static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, |
1867 | +static int perform_atomic_semop(struct sem_array *sma, struct sembuf *sops, |
1868 | int nsops, struct sem_undo *un, int pid) |
1869 | { |
1870 | int result, sem_op; |
1871 | @@ -609,60 +718,132 @@ static void unlink_queue(struct sem_array *sma, struct sem_queue *q) |
1872 | * update_queue is O(N^2) when it restarts scanning the whole queue of |
1873 | * waiting operations. Therefore this function checks if the restart is |
1874 | * really necessary. It is called after a previously waiting operation |
1875 | - * was completed. |
1876 | + * modified the array. |
1877 | + * Note that wait-for-zero operations are handled without restart. |
1878 | */ |
1879 | static int check_restart(struct sem_array *sma, struct sem_queue *q) |
1880 | { |
1881 | - struct sem *curr; |
1882 | - struct sem_queue *h; |
1883 | - |
1884 | - /* if the operation didn't modify the array, then no restart */ |
1885 | - if (q->alter == 0) |
1886 | - return 0; |
1887 | - |
1888 | - /* pending complex operations are too difficult to analyse */ |
1889 | - if (sma->complex_count) |
1890 | + /* pending complex alter operations are too difficult to analyse */ |
1891 | + if (!list_empty(&sma->pending_alter)) |
1892 | return 1; |
1893 | |
1894 | /* we were a sleeping complex operation. Too difficult */ |
1895 | if (q->nsops > 1) |
1896 | return 1; |
1897 | |
1898 | - curr = sma->sem_base + q->sops[0].sem_num; |
1899 | + /* It is impossible that someone waits for the new value: |
1900 | + * - complex operations always restart. |
1901 | + * - wait-for-zero are handled seperately. |
1902 | + * - q is a previously sleeping simple operation that |
1903 | + * altered the array. It must be a decrement, because |
1904 | + * simple increments never sleep. |
1905 | + * - If there are older (higher priority) decrements |
1906 | + * in the queue, then they have observed the original |
1907 | + * semval value and couldn't proceed. The operation |
1908 | + * decremented to value - thus they won't proceed either. |
1909 | + */ |
1910 | + return 0; |
1911 | +} |
1912 | |
1913 | - /* No-one waits on this queue */ |
1914 | - if (list_empty(&curr->sem_pending)) |
1915 | - return 0; |
1916 | +/** |
1917 | + * wake_const_ops(sma, semnum, pt) - Wake up non-alter tasks |
1918 | + * @sma: semaphore array. |
1919 | + * @semnum: semaphore that was modified. |
1920 | + * @pt: list head for the tasks that must be woken up. |
1921 | + * |
1922 | + * wake_const_ops must be called after a semaphore in a semaphore array |
1923 | + * was set to 0. If complex const operations are pending, wake_const_ops must |
1924 | + * be called with semnum = -1, as well as with the number of each modified |
1925 | + * semaphore. |
1926 | + * The tasks that must be woken up are added to @pt. The return code |
1927 | + * is stored in q->pid. |
1928 | + * The function returns 1 if at least one operation was completed successfully. |
1929 | + */ |
1930 | +static int wake_const_ops(struct sem_array *sma, int semnum, |
1931 | + struct list_head *pt) |
1932 | +{ |
1933 | + struct sem_queue *q; |
1934 | + struct list_head *walk; |
1935 | + struct list_head *pending_list; |
1936 | + int semop_completed = 0; |
1937 | |
1938 | - /* the new semaphore value */ |
1939 | - if (curr->semval) { |
1940 | - /* It is impossible that someone waits for the new value: |
1941 | - * - q is a previously sleeping simple operation that |
1942 | - * altered the array. It must be a decrement, because |
1943 | - * simple increments never sleep. |
1944 | - * - The value is not 0, thus wait-for-zero won't proceed. |
1945 | - * - If there are older (higher priority) decrements |
1946 | - * in the queue, then they have observed the original |
1947 | - * semval value and couldn't proceed. The operation |
1948 | - * decremented to value - thus they won't proceed either. |
1949 | + if (semnum == -1) |
1950 | + pending_list = &sma->pending_const; |
1951 | + else |
1952 | + pending_list = &sma->sem_base[semnum].pending_const; |
1953 | + |
1954 | + walk = pending_list->next; |
1955 | + while (walk != pending_list) { |
1956 | + int error; |
1957 | + |
1958 | + q = container_of(walk, struct sem_queue, list); |
1959 | + walk = walk->next; |
1960 | + |
1961 | + error = perform_atomic_semop(sma, q->sops, q->nsops, |
1962 | + q->undo, q->pid); |
1963 | + |
1964 | + if (error <= 0) { |
1965 | + /* operation completed, remove from queue & wakeup */ |
1966 | + |
1967 | + unlink_queue(sma, q); |
1968 | + |
1969 | + wake_up_sem_queue_prepare(pt, q, error); |
1970 | + if (error == 0) |
1971 | + semop_completed = 1; |
1972 | + } |
1973 | + } |
1974 | + return semop_completed; |
1975 | +} |
1976 | + |
1977 | +/** |
1978 | + * do_smart_wakeup_zero(sma, sops, nsops, pt) - wakeup all wait for zero tasks |
1979 | + * @sma: semaphore array |
1980 | + * @sops: operations that were performed |
1981 | + * @nsops: number of operations |
1982 | + * @pt: list head of the tasks that must be woken up. |
1983 | + * |
1984 | + * do_smart_wakeup_zero() checks all required queue for wait-for-zero |
1985 | + * operations, based on the actual changes that were performed on the |
1986 | + * semaphore array. |
1987 | + * The function returns 1 if at least one operation was completed successfully. |
1988 | + */ |
1989 | +static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, |
1990 | + int nsops, struct list_head *pt) |
1991 | +{ |
1992 | + int i; |
1993 | + int semop_completed = 0; |
1994 | + int got_zero = 0; |
1995 | + |
1996 | + /* first: the per-semaphore queues, if known */ |
1997 | + if (sops) { |
1998 | + for (i = 0; i < nsops; i++) { |
1999 | + int num = sops[i].sem_num; |
2000 | + |
2001 | + if (sma->sem_base[num].semval == 0) { |
2002 | + got_zero = 1; |
2003 | + semop_completed |= wake_const_ops(sma, num, pt); |
2004 | + } |
2005 | + } |
2006 | + } else { |
2007 | + /* |
2008 | + * No sops means modified semaphores not known. |
2009 | + * Assume all were changed. |
2010 | */ |
2011 | - BUG_ON(q->sops[0].sem_op >= 0); |
2012 | - return 0; |
2013 | + for (i = 0; i < sma->sem_nsems; i++) { |
2014 | + if (sma->sem_base[i].semval == 0) { |
2015 | + got_zero = 1; |
2016 | + semop_completed |= wake_const_ops(sma, i, pt); |
2017 | + } |
2018 | + } |
2019 | } |
2020 | /* |
2021 | - * semval is 0. Check if there are wait-for-zero semops. |
2022 | - * They must be the first entries in the per-semaphore queue |
2023 | + * If one of the modified semaphores got 0, |
2024 | + * then check the global queue, too. |
2025 | */ |
2026 | - h = list_first_entry(&curr->sem_pending, struct sem_queue, list); |
2027 | - BUG_ON(h->nsops != 1); |
2028 | - BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num); |
2029 | + if (got_zero) |
2030 | + semop_completed |= wake_const_ops(sma, -1, pt); |
2031 | |
2032 | - /* Yes, there is a wait-for-zero semop. Restart */ |
2033 | - if (h->sops[0].sem_op == 0) |
2034 | - return 1; |
2035 | - |
2036 | - /* Again - no-one is waiting for the new value. */ |
2037 | - return 0; |
2038 | + return semop_completed; |
2039 | } |
2040 | |
2041 | |
2042 | @@ -678,6 +859,8 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q) |
2043 | * semaphore. |
2044 | * The tasks that must be woken up are added to @pt. The return code |
2045 | * is stored in q->pid. |
2046 | + * The function internally checks if const operations can now succeed. |
2047 | + * |
2048 | * The function return 1 if at least one semop was completed successfully. |
2049 | */ |
2050 | static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) |
2051 | @@ -688,9 +871,9 @@ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) |
2052 | int semop_completed = 0; |
2053 | |
2054 | if (semnum == -1) |
2055 | - pending_list = &sma->sem_pending; |
2056 | + pending_list = &sma->pending_alter; |
2057 | else |
2058 | - pending_list = &sma->sem_base[semnum].sem_pending; |
2059 | + pending_list = &sma->sem_base[semnum].pending_alter; |
2060 | |
2061 | again: |
2062 | walk = pending_list->next; |
2063 | @@ -702,16 +885,15 @@ again: |
2064 | |
2065 | /* If we are scanning the single sop, per-semaphore list of |
2066 | * one semaphore and that semaphore is 0, then it is not |
2067 | - * necessary to scan the "alter" entries: simple increments |
2068 | + * necessary to scan further: simple increments |
2069 | * that affect only one entry succeed immediately and cannot |
2070 | * be in the per semaphore pending queue, and decrements |
2071 | * cannot be successful if the value is already 0. |
2072 | */ |
2073 | - if (semnum != -1 && sma->sem_base[semnum].semval == 0 && |
2074 | - q->alter) |
2075 | + if (semnum != -1 && sma->sem_base[semnum].semval == 0) |
2076 | break; |
2077 | |
2078 | - error = try_atomic_semop(sma, q->sops, q->nsops, |
2079 | + error = perform_atomic_semop(sma, q->sops, q->nsops, |
2080 | q->undo, q->pid); |
2081 | |
2082 | /* Does q->sleeper still need to sleep? */ |
2083 | @@ -724,6 +906,7 @@ again: |
2084 | restart = 0; |
2085 | } else { |
2086 | semop_completed = 1; |
2087 | + do_smart_wakeup_zero(sma, q->sops, q->nsops, pt); |
2088 | restart = check_restart(sma, q); |
2089 | } |
2090 | |
2091 | @@ -735,6 +918,24 @@ again: |
2092 | } |
2093 | |
2094 | /** |
2095 | + * set_semotime(sma, sops) - set sem_otime |
2096 | + * @sma: semaphore array |
2097 | + * @sops: operations that modified the array, may be NULL |
2098 | + * |
2099 | + * sem_otime is replicated to avoid cache line trashing. |
2100 | + * This function sets one instance to the current time. |
2101 | + */ |
2102 | +static void set_semotime(struct sem_array *sma, struct sembuf *sops) |
2103 | +{ |
2104 | + if (sops == NULL) { |
2105 | + sma->sem_base[0].sem_otime = get_seconds(); |
2106 | + } else { |
2107 | + sma->sem_base[sops[0].sem_num].sem_otime = |
2108 | + get_seconds(); |
2109 | + } |
2110 | +} |
2111 | + |
2112 | +/** |
2113 | * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue |
2114 | * @sma: semaphore array |
2115 | * @sops: operations that were performed |
2116 | @@ -742,8 +943,8 @@ again: |
2117 | * @otime: force setting otime |
2118 | * @pt: list head of the tasks that must be woken up. |
2119 | * |
2120 | - * do_smart_update() does the required called to update_queue, based on the |
2121 | - * actual changes that were performed on the semaphore array. |
2122 | + * do_smart_update() does the required calls to update_queue and wakeup_zero, |
2123 | + * based on the actual changes that were performed on the semaphore array. |
2124 | * Note that the function does not do the actual wake-up: the caller is |
2125 | * responsible for calling wake_up_sem_queue_do(@pt). |
2126 | * It is safe to perform this call after dropping all locks. |
2127 | @@ -752,52 +953,42 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop |
2128 | int otime, struct list_head *pt) |
2129 | { |
2130 | int i; |
2131 | - int progress; |
2132 | - |
2133 | - progress = 1; |
2134 | -retry_global: |
2135 | - if (sma->complex_count) { |
2136 | - if (update_queue(sma, -1, pt)) { |
2137 | - progress = 1; |
2138 | - otime = 1; |
2139 | - sops = NULL; |
2140 | - } |
2141 | - } |
2142 | - if (!progress) |
2143 | - goto done; |
2144 | |
2145 | - if (!sops) { |
2146 | - /* No semops; something special is going on. */ |
2147 | - for (i = 0; i < sma->sem_nsems; i++) { |
2148 | - if (update_queue(sma, i, pt)) { |
2149 | - otime = 1; |
2150 | - progress = 1; |
2151 | - } |
2152 | - } |
2153 | - goto done_checkretry; |
2154 | - } |
2155 | + otime |= do_smart_wakeup_zero(sma, sops, nsops, pt); |
2156 | |
2157 | - /* Check the semaphores that were modified. */ |
2158 | - for (i = 0; i < nsops; i++) { |
2159 | - if (sops[i].sem_op > 0 || |
2160 | - (sops[i].sem_op < 0 && |
2161 | - sma->sem_base[sops[i].sem_num].semval == 0)) |
2162 | - if (update_queue(sma, sops[i].sem_num, pt)) { |
2163 | - otime = 1; |
2164 | - progress = 1; |
2165 | + if (!list_empty(&sma->pending_alter)) { |
2166 | + /* semaphore array uses the global queue - just process it. */ |
2167 | + otime |= update_queue(sma, -1, pt); |
2168 | + } else { |
2169 | + if (!sops) { |
2170 | + /* |
2171 | + * No sops, thus the modified semaphores are not |
2172 | + * known. Check all. |
2173 | + */ |
2174 | + for (i = 0; i < sma->sem_nsems; i++) |
2175 | + otime |= update_queue(sma, i, pt); |
2176 | + } else { |
2177 | + /* |
2178 | + * Check the semaphores that were increased: |
2179 | + * - No complex ops, thus all sleeping ops are |
2180 | + * decrease. |
2181 | + * - if we decreased the value, then any sleeping |
2182 | + * semaphore ops wont be able to run: If the |
2183 | + * previous value was too small, then the new |
2184 | + * value will be too small, too. |
2185 | + */ |
2186 | + for (i = 0; i < nsops; i++) { |
2187 | + if (sops[i].sem_op > 0) { |
2188 | + otime |= update_queue(sma, |
2189 | + sops[i].sem_num, pt); |
2190 | + } |
2191 | } |
2192 | + } |
2193 | } |
2194 | -done_checkretry: |
2195 | - if (progress) { |
2196 | - progress = 0; |
2197 | - goto retry_global; |
2198 | - } |
2199 | -done: |
2200 | if (otime) |
2201 | - sma->sem_otime = get_seconds(); |
2202 | + set_semotime(sma, sops); |
2203 | } |
2204 | |
2205 | - |
2206 | /* The following counts are associated to each semaphore: |
2207 | * semncnt number of tasks waiting on semval being nonzero |
2208 | * semzcnt number of tasks waiting on semval being zero |
2209 | @@ -813,14 +1004,14 @@ static int count_semncnt (struct sem_array * sma, ushort semnum) |
2210 | struct sem_queue * q; |
2211 | |
2212 | semncnt = 0; |
2213 | - list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) { |
2214 | + list_for_each_entry(q, &sma->sem_base[semnum].pending_alter, list) { |
2215 | struct sembuf * sops = q->sops; |
2216 | BUG_ON(sops->sem_num != semnum); |
2217 | if ((sops->sem_op < 0) && !(sops->sem_flg & IPC_NOWAIT)) |
2218 | semncnt++; |
2219 | } |
2220 | |
2221 | - list_for_each_entry(q, &sma->sem_pending, list) { |
2222 | + list_for_each_entry(q, &sma->pending_alter, list) { |
2223 | struct sembuf * sops = q->sops; |
2224 | int nsops = q->nsops; |
2225 | int i; |
2226 | @@ -839,14 +1030,14 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum) |
2227 | struct sem_queue * q; |
2228 | |
2229 | semzcnt = 0; |
2230 | - list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) { |
2231 | + list_for_each_entry(q, &sma->sem_base[semnum].pending_const, list) { |
2232 | struct sembuf * sops = q->sops; |
2233 | BUG_ON(sops->sem_num != semnum); |
2234 | if ((sops->sem_op == 0) && !(sops->sem_flg & IPC_NOWAIT)) |
2235 | semzcnt++; |
2236 | } |
2237 | |
2238 | - list_for_each_entry(q, &sma->sem_pending, list) { |
2239 | + list_for_each_entry(q, &sma->pending_const, list) { |
2240 | struct sembuf * sops = q->sops; |
2241 | int nsops = q->nsops; |
2242 | int i; |
2243 | @@ -859,8 +1050,8 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum) |
2244 | return semzcnt; |
2245 | } |
2246 | |
2247 | -/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked |
2248 | - * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex |
2249 | +/* Free a semaphore set. freeary() is called with sem_ids.rwsem locked |
2250 | + * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem |
2251 | * remains locked on exit. |
2252 | */ |
2253 | static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) |
2254 | @@ -872,7 +1063,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) |
2255 | int i; |
2256 | |
2257 | /* Free the existing undo structures for this semaphore set. */ |
2258 | - assert_spin_locked(&sma->sem_perm.lock); |
2259 | + ipc_assert_locked_object(&sma->sem_perm); |
2260 | list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { |
2261 | list_del(&un->list_id); |
2262 | spin_lock(&un->ulp->lock); |
2263 | @@ -884,13 +1075,22 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) |
2264 | |
2265 | /* Wake up all pending processes and let them fail with EIDRM. */ |
2266 | INIT_LIST_HEAD(&tasks); |
2267 | - list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { |
2268 | + list_for_each_entry_safe(q, tq, &sma->pending_const, list) { |
2269 | + unlink_queue(sma, q); |
2270 | + wake_up_sem_queue_prepare(&tasks, q, -EIDRM); |
2271 | + } |
2272 | + |
2273 | + list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { |
2274 | unlink_queue(sma, q); |
2275 | wake_up_sem_queue_prepare(&tasks, q, -EIDRM); |
2276 | } |
2277 | for (i = 0; i < sma->sem_nsems; i++) { |
2278 | struct sem *sem = sma->sem_base + i; |
2279 | - list_for_each_entry_safe(q, tq, &sem->sem_pending, list) { |
2280 | + list_for_each_entry_safe(q, tq, &sem->pending_const, list) { |
2281 | + unlink_queue(sma, q); |
2282 | + wake_up_sem_queue_prepare(&tasks, q, -EIDRM); |
2283 | + } |
2284 | + list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { |
2285 | unlink_queue(sma, q); |
2286 | wake_up_sem_queue_prepare(&tasks, q, -EIDRM); |
2287 | } |
2288 | @@ -903,8 +1103,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) |
2289 | |
2290 | wake_up_sem_queue_do(&tasks); |
2291 | ns->used_sems -= sma->sem_nsems; |
2292 | - security_sem_free(sma); |
2293 | - ipc_rcu_putref(sma); |
2294 | + ipc_rcu_putref(sma, sem_rcu_free); |
2295 | } |
2296 | |
2297 | static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) |
2298 | @@ -931,6 +1130,21 @@ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, |
2299 | } |
2300 | } |
2301 | |
2302 | +static time_t get_semotime(struct sem_array *sma) |
2303 | +{ |
2304 | + int i; |
2305 | + time_t res; |
2306 | + |
2307 | + res = sma->sem_base[0].sem_otime; |
2308 | + for (i = 1; i < sma->sem_nsems; i++) { |
2309 | + time_t to = sma->sem_base[i].sem_otime; |
2310 | + |
2311 | + if (to > res) |
2312 | + res = to; |
2313 | + } |
2314 | + return res; |
2315 | +} |
2316 | + |
2317 | static int semctl_nolock(struct ipc_namespace *ns, int semid, |
2318 | int cmd, int version, void __user *p) |
2319 | { |
2320 | @@ -957,7 +1171,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, |
2321 | seminfo.semmnu = SEMMNU; |
2322 | seminfo.semmap = SEMMAP; |
2323 | seminfo.semume = SEMUME; |
2324 | - down_read(&sem_ids(ns).rw_mutex); |
2325 | + down_read(&sem_ids(ns).rwsem); |
2326 | if (cmd == SEM_INFO) { |
2327 | seminfo.semusz = sem_ids(ns).in_use; |
2328 | seminfo.semaem = ns->used_sems; |
2329 | @@ -966,7 +1180,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, |
2330 | seminfo.semaem = SEMAEM; |
2331 | } |
2332 | max_id = ipc_get_maxid(&sem_ids(ns)); |
2333 | - up_read(&sem_ids(ns).rw_mutex); |
2334 | + up_read(&sem_ids(ns).rwsem); |
2335 | if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) |
2336 | return -EFAULT; |
2337 | return (max_id < 0) ? 0: max_id; |
2338 | @@ -1004,9 +1218,9 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, |
2339 | goto out_unlock; |
2340 | |
2341 | kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); |
2342 | - tbuf.sem_otime = sma->sem_otime; |
2343 | - tbuf.sem_ctime = sma->sem_ctime; |
2344 | - tbuf.sem_nsems = sma->sem_nsems; |
2345 | + tbuf.sem_otime = get_semotime(sma); |
2346 | + tbuf.sem_ctime = sma->sem_ctime; |
2347 | + tbuf.sem_nsems = sma->sem_nsems; |
2348 | rcu_read_unlock(); |
2349 | if (copy_semid_to_user(p, &tbuf, version)) |
2350 | return -EFAULT; |
2351 | @@ -1070,7 +1284,7 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, |
2352 | |
2353 | curr = &sma->sem_base[semnum]; |
2354 | |
2355 | - assert_spin_locked(&sma->sem_perm.lock); |
2356 | + ipc_assert_locked_object(&sma->sem_perm); |
2357 | list_for_each_entry(un, &sma->list_id, list_id) |
2358 | un->semadj[semnum] = 0; |
2359 | |
2360 | @@ -1133,7 +1347,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, |
2361 | rcu_read_unlock(); |
2362 | sem_io = ipc_alloc(sizeof(ushort)*nsems); |
2363 | if(sem_io == NULL) { |
2364 | - sem_putref(sma); |
2365 | + ipc_rcu_putref(sma, ipc_rcu_free); |
2366 | return -ENOMEM; |
2367 | } |
2368 | |
2369 | @@ -1169,20 +1383,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, |
2370 | if(nsems > SEMMSL_FAST) { |
2371 | sem_io = ipc_alloc(sizeof(ushort)*nsems); |
2372 | if(sem_io == NULL) { |
2373 | - sem_putref(sma); |
2374 | + ipc_rcu_putref(sma, ipc_rcu_free); |
2375 | return -ENOMEM; |
2376 | } |
2377 | } |
2378 | |
2379 | if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { |
2380 | - sem_putref(sma); |
2381 | + ipc_rcu_putref(sma, ipc_rcu_free); |
2382 | err = -EFAULT; |
2383 | goto out_free; |
2384 | } |
2385 | |
2386 | for (i = 0; i < nsems; i++) { |
2387 | if (sem_io[i] > SEMVMX) { |
2388 | - sem_putref(sma); |
2389 | + ipc_rcu_putref(sma, ipc_rcu_free); |
2390 | err = -ERANGE; |
2391 | goto out_free; |
2392 | } |
2393 | @@ -1199,7 +1413,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, |
2394 | for (i = 0; i < nsems; i++) |
2395 | sma->sem_base[i].semval = sem_io[i]; |
2396 | |
2397 | - assert_spin_locked(&sma->sem_perm.lock); |
2398 | + ipc_assert_locked_object(&sma->sem_perm); |
2399 | list_for_each_entry(un, &sma->list_id, list_id) { |
2400 | for (i = 0; i < nsems; i++) |
2401 | un->semadj[i] = 0; |
2402 | @@ -1272,9 +1486,9 @@ copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) |
2403 | } |
2404 | |
2405 | /* |
2406 | - * This function handles some semctl commands which require the rw_mutex |
2407 | + * This function handles some semctl commands which require the rwsem |
2408 | * to be held in write mode. |
2409 | - * NOTE: no locks must be held, the rw_mutex is taken inside this function. |
2410 | + * NOTE: no locks must be held, the rwsem is taken inside this function. |
2411 | */ |
2412 | static int semctl_down(struct ipc_namespace *ns, int semid, |
2413 | int cmd, int version, void __user *p) |
2414 | @@ -1289,42 +1503,46 @@ static int semctl_down(struct ipc_namespace *ns, int semid, |
2415 | return -EFAULT; |
2416 | } |
2417 | |
2418 | + down_write(&sem_ids(ns).rwsem); |
2419 | + rcu_read_lock(); |
2420 | + |
2421 | ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, |
2422 | &semid64.sem_perm, 0); |
2423 | - if (IS_ERR(ipcp)) |
2424 | - return PTR_ERR(ipcp); |
2425 | + if (IS_ERR(ipcp)) { |
2426 | + err = PTR_ERR(ipcp); |
2427 | + goto out_unlock1; |
2428 | + } |
2429 | |
2430 | sma = container_of(ipcp, struct sem_array, sem_perm); |
2431 | |
2432 | err = security_sem_semctl(sma, cmd); |
2433 | - if (err) { |
2434 | - rcu_read_unlock(); |
2435 | - goto out_up; |
2436 | - } |
2437 | + if (err) |
2438 | + goto out_unlock1; |
2439 | |
2440 | - switch(cmd){ |
2441 | + switch (cmd) { |
2442 | case IPC_RMID: |
2443 | sem_lock(sma, NULL, -1); |
2444 | + /* freeary unlocks the ipc object and rcu */ |
2445 | freeary(ns, ipcp); |
2446 | goto out_up; |
2447 | case IPC_SET: |
2448 | sem_lock(sma, NULL, -1); |
2449 | err = ipc_update_perm(&semid64.sem_perm, ipcp); |
2450 | if (err) |
2451 | - goto out_unlock; |
2452 | + goto out_unlock0; |
2453 | sma->sem_ctime = get_seconds(); |
2454 | break; |
2455 | default: |
2456 | - rcu_read_unlock(); |
2457 | err = -EINVAL; |
2458 | - goto out_up; |
2459 | + goto out_unlock1; |
2460 | } |
2461 | |
2462 | -out_unlock: |
2463 | +out_unlock0: |
2464 | sem_unlock(sma, -1); |
2465 | +out_unlock1: |
2466 | rcu_read_unlock(); |
2467 | out_up: |
2468 | - up_write(&sem_ids(ns).rw_mutex); |
2469 | + up_write(&sem_ids(ns).rwsem); |
2470 | return err; |
2471 | } |
2472 | |
2473 | @@ -1466,7 +1684,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) |
2474 | /* step 2: allocate new undo structure */ |
2475 | new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); |
2476 | if (!new) { |
2477 | - sem_putref(sma); |
2478 | + ipc_rcu_putref(sma, ipc_rcu_free); |
2479 | return ERR_PTR(-ENOMEM); |
2480 | } |
2481 | |
2482 | @@ -1496,7 +1714,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) |
2483 | new->semid = semid; |
2484 | assert_spin_locked(&ulp->lock); |
2485 | list_add_rcu(&new->list_proc, &ulp->list_proc); |
2486 | - assert_spin_locked(&sma->sem_perm.lock); |
2487 | + ipc_assert_locked_object(&sma->sem_perm); |
2488 | list_add(&new->list_id, &sma->list_id); |
2489 | un = new; |
2490 | |
2491 | @@ -1533,7 +1751,6 @@ static int get_queue_result(struct sem_queue *q) |
2492 | return error; |
2493 | } |
2494 | |
2495 | - |
2496 | SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, |
2497 | unsigned, nsops, const struct timespec __user *, timeout) |
2498 | { |
2499 | @@ -1631,13 +1848,19 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, |
2500 | if (un && un->semid == -1) |
2501 | goto out_unlock_free; |
2502 | |
2503 | - error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); |
2504 | - if (error <= 0) { |
2505 | - if (alter && error == 0) |
2506 | + error = perform_atomic_semop(sma, sops, nsops, un, |
2507 | + task_tgid_vnr(current)); |
2508 | + if (error == 0) { |
2509 | + /* If the operation was successful, then do |
2510 | + * the required updates. |
2511 | + */ |
2512 | + if (alter) |
2513 | do_smart_update(sma, sops, nsops, 1, &tasks); |
2514 | - |
2515 | - goto out_unlock_free; |
2516 | + else |
2517 | + set_semotime(sma, sops); |
2518 | } |
2519 | + if (error <= 0) |
2520 | + goto out_unlock_free; |
2521 | |
2522 | /* We need to sleep on this operation, so we put the current |
2523 | * task into the pending queue and go to sleep. |
2524 | @@ -1653,15 +1876,27 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, |
2525 | struct sem *curr; |
2526 | curr = &sma->sem_base[sops->sem_num]; |
2527 | |
2528 | - if (alter) |
2529 | - list_add_tail(&queue.list, &curr->sem_pending); |
2530 | - else |
2531 | - list_add(&queue.list, &curr->sem_pending); |
2532 | + if (alter) { |
2533 | + if (sma->complex_count) { |
2534 | + list_add_tail(&queue.list, |
2535 | + &sma->pending_alter); |
2536 | + } else { |
2537 | + |
2538 | + list_add_tail(&queue.list, |
2539 | + &curr->pending_alter); |
2540 | + } |
2541 | + } else { |
2542 | + list_add_tail(&queue.list, &curr->pending_const); |
2543 | + } |
2544 | } else { |
2545 | + if (!sma->complex_count) |
2546 | + merge_queues(sma); |
2547 | + |
2548 | if (alter) |
2549 | - list_add_tail(&queue.list, &sma->sem_pending); |
2550 | + list_add_tail(&queue.list, &sma->pending_alter); |
2551 | else |
2552 | - list_add(&queue.list, &sma->sem_pending); |
2553 | + list_add_tail(&queue.list, &sma->pending_const); |
2554 | + |
2555 | sma->complex_count++; |
2556 | } |
2557 | |
2558 | @@ -1833,7 +2068,7 @@ void exit_sem(struct task_struct *tsk) |
2559 | } |
2560 | |
2561 | /* remove un from the linked lists */ |
2562 | - assert_spin_locked(&sma->sem_perm.lock); |
2563 | + ipc_assert_locked_object(&sma->sem_perm); |
2564 | list_del(&un->list_id); |
2565 | |
2566 | spin_lock(&ulp->lock); |
2567 | @@ -1882,6 +2117,17 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it) |
2568 | { |
2569 | struct user_namespace *user_ns = seq_user_ns(s); |
2570 | struct sem_array *sma = it; |
2571 | + time_t sem_otime; |
2572 | + |
2573 | + /* |
2574 | + * The proc interface isn't aware of sem_lock(), it calls |
2575 | + * ipc_lock_object() directly (in sysvipc_find_ipc). |
2576 | + * In order to stay compatible with sem_lock(), we must wait until |
2577 | + * all simple semop() calls have left their critical regions. |
2578 | + */ |
2579 | + sem_wait_array(sma); |
2580 | + |
2581 | + sem_otime = get_semotime(sma); |
2582 | |
2583 | return seq_printf(s, |
2584 | "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n", |
2585 | @@ -1893,7 +2139,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it) |
2586 | from_kgid_munged(user_ns, sma->sem_perm.gid), |
2587 | from_kuid_munged(user_ns, sma->sem_perm.cuid), |
2588 | from_kgid_munged(user_ns, sma->sem_perm.cgid), |
2589 | - sma->sem_otime, |
2590 | + sem_otime, |
2591 | sma->sem_ctime); |
2592 | } |
2593 | #endif |
2594 | diff --git a/ipc/shm.c b/ipc/shm.c |
2595 | index 7e199fa..7b87bea 100644 |
2596 | --- a/ipc/shm.c |
2597 | +++ b/ipc/shm.c |
2598 | @@ -19,6 +19,9 @@ |
2599 | * namespaces support |
2600 | * OpenVZ, SWsoft Inc. |
2601 | * Pavel Emelianov <xemul@openvz.org> |
2602 | + * |
2603 | + * Better ipc lock (kern_ipc_perm.lock) handling |
2604 | + * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013. |
2605 | */ |
2606 | |
2607 | #include <linux/slab.h> |
2608 | @@ -80,8 +83,8 @@ void shm_init_ns(struct ipc_namespace *ns) |
2609 | } |
2610 | |
2611 | /* |
2612 | - * Called with shm_ids.rw_mutex (writer) and the shp structure locked. |
2613 | - * Only shm_ids.rw_mutex remains locked on exit. |
2614 | + * Called with shm_ids.rwsem (writer) and the shp structure locked. |
2615 | + * Only shm_ids.rwsem remains locked on exit. |
2616 | */ |
2617 | static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) |
2618 | { |
2619 | @@ -124,8 +127,28 @@ void __init shm_init (void) |
2620 | IPC_SHM_IDS, sysvipc_shm_proc_show); |
2621 | } |
2622 | |
2623 | +static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id) |
2624 | +{ |
2625 | + struct kern_ipc_perm *ipcp = ipc_obtain_object(&shm_ids(ns), id); |
2626 | + |
2627 | + if (IS_ERR(ipcp)) |
2628 | + return ERR_CAST(ipcp); |
2629 | + |
2630 | + return container_of(ipcp, struct shmid_kernel, shm_perm); |
2631 | +} |
2632 | + |
2633 | +static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id) |
2634 | +{ |
2635 | + struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id); |
2636 | + |
2637 | + if (IS_ERR(ipcp)) |
2638 | + return ERR_CAST(ipcp); |
2639 | + |
2640 | + return container_of(ipcp, struct shmid_kernel, shm_perm); |
2641 | +} |
2642 | + |
2643 | /* |
2644 | - * shm_lock_(check_) routines are called in the paths where the rw_mutex |
2645 | + * shm_lock_(check_) routines are called in the paths where the rwsem |
2646 | * is not necessarily held. |
2647 | */ |
2648 | static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) |
2649 | @@ -141,18 +164,16 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) |
2650 | static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) |
2651 | { |
2652 | rcu_read_lock(); |
2653 | - spin_lock(&ipcp->shm_perm.lock); |
2654 | + ipc_lock_object(&ipcp->shm_perm); |
2655 | } |
2656 | |
2657 | -static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns, |
2658 | - int id) |
2659 | +static void shm_rcu_free(struct rcu_head *head) |
2660 | { |
2661 | - struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id); |
2662 | - |
2663 | - if (IS_ERR(ipcp)) |
2664 | - return (struct shmid_kernel *)ipcp; |
2665 | + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); |
2666 | + struct shmid_kernel *shp = ipc_rcu_to_struct(p); |
2667 | |
2668 | - return container_of(ipcp, struct shmid_kernel, shm_perm); |
2669 | + security_shm_free(shp); |
2670 | + ipc_rcu_free(head); |
2671 | } |
2672 | |
2673 | static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) |
2674 | @@ -182,7 +203,7 @@ static void shm_open(struct vm_area_struct *vma) |
2675 | * @ns: namespace |
2676 | * @shp: struct to free |
2677 | * |
2678 | - * It has to be called with shp and shm_ids.rw_mutex (writer) locked, |
2679 | + * It has to be called with shp and shm_ids.rwsem (writer) locked, |
2680 | * but returns with shp unlocked and freed. |
2681 | */ |
2682 | static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) |
2683 | @@ -196,8 +217,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) |
2684 | user_shm_unlock(file_inode(shp->shm_file)->i_size, |
2685 | shp->mlock_user); |
2686 | fput (shp->shm_file); |
2687 | - security_shm_free(shp); |
2688 | - ipc_rcu_putref(shp); |
2689 | + ipc_rcu_putref(shp, shm_rcu_free); |
2690 | } |
2691 | |
2692 | /* |
2693 | @@ -230,7 +250,7 @@ static void shm_close(struct vm_area_struct *vma) |
2694 | struct shmid_kernel *shp; |
2695 | struct ipc_namespace *ns = sfd->ns; |
2696 | |
2697 | - down_write(&shm_ids(ns).rw_mutex); |
2698 | + down_write(&shm_ids(ns).rwsem); |
2699 | /* remove from the list of attaches of the shm segment */ |
2700 | shp = shm_lock(ns, sfd->id); |
2701 | BUG_ON(IS_ERR(shp)); |
2702 | @@ -241,10 +261,10 @@ static void shm_close(struct vm_area_struct *vma) |
2703 | shm_destroy(ns, shp); |
2704 | else |
2705 | shm_unlock(shp); |
2706 | - up_write(&shm_ids(ns).rw_mutex); |
2707 | + up_write(&shm_ids(ns).rwsem); |
2708 | } |
2709 | |
2710 | -/* Called with ns->shm_ids(ns).rw_mutex locked */ |
2711 | +/* Called with ns->shm_ids(ns).rwsem locked */ |
2712 | static int shm_try_destroy_current(int id, void *p, void *data) |
2713 | { |
2714 | struct ipc_namespace *ns = data; |
2715 | @@ -275,7 +295,7 @@ static int shm_try_destroy_current(int id, void *p, void *data) |
2716 | return 0; |
2717 | } |
2718 | |
2719 | -/* Called with ns->shm_ids(ns).rw_mutex locked */ |
2720 | +/* Called with ns->shm_ids(ns).rwsem locked */ |
2721 | static int shm_try_destroy_orphaned(int id, void *p, void *data) |
2722 | { |
2723 | struct ipc_namespace *ns = data; |
2724 | @@ -286,7 +306,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) |
2725 | * We want to destroy segments without users and with already |
2726 | * exit'ed originating process. |
2727 | * |
2728 | - * As shp->* are changed under rw_mutex, it's safe to skip shp locking. |
2729 | + * As shp->* are changed under rwsem, it's safe to skip shp locking. |
2730 | */ |
2731 | if (shp->shm_creator != NULL) |
2732 | return 0; |
2733 | @@ -300,10 +320,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) |
2734 | |
2735 | void shm_destroy_orphaned(struct ipc_namespace *ns) |
2736 | { |
2737 | - down_write(&shm_ids(ns).rw_mutex); |
2738 | + down_write(&shm_ids(ns).rwsem); |
2739 | if (shm_ids(ns).in_use) |
2740 | idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); |
2741 | - up_write(&shm_ids(ns).rw_mutex); |
2742 | + up_write(&shm_ids(ns).rwsem); |
2743 | } |
2744 | |
2745 | |
2746 | @@ -315,10 +335,10 @@ void exit_shm(struct task_struct *task) |
2747 | return; |
2748 | |
2749 | /* Destroy all already created segments, but not mapped yet */ |
2750 | - down_write(&shm_ids(ns).rw_mutex); |
2751 | + down_write(&shm_ids(ns).rwsem); |
2752 | if (shm_ids(ns).in_use) |
2753 | idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); |
2754 | - up_write(&shm_ids(ns).rw_mutex); |
2755 | + up_write(&shm_ids(ns).rwsem); |
2756 | } |
2757 | |
2758 | static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
2759 | @@ -452,7 +472,7 @@ static const struct vm_operations_struct shm_vm_ops = { |
2760 | * @ns: namespace |
2761 | * @params: ptr to the structure that contains key, size and shmflg |
2762 | * |
2763 | - * Called with shm_ids.rw_mutex held as a writer. |
2764 | + * Called with shm_ids.rwsem held as a writer. |
2765 | */ |
2766 | |
2767 | static int newseg(struct ipc_namespace *ns, struct ipc_params *params) |
2768 | @@ -485,7 +505,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) |
2769 | shp->shm_perm.security = NULL; |
2770 | error = security_shm_alloc(shp); |
2771 | if (error) { |
2772 | - ipc_rcu_putref(shp); |
2773 | + ipc_rcu_putref(shp, ipc_rcu_free); |
2774 | return error; |
2775 | } |
2776 | |
2777 | @@ -535,6 +555,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) |
2778 | shp->shm_nattch = 0; |
2779 | shp->shm_file = file; |
2780 | shp->shm_creator = current; |
2781 | + |
2782 | /* |
2783 | * shmid gets reported as "inode#" in /proc/pid/maps. |
2784 | * proc-ps tools use this. Changing this will break them. |
2785 | @@ -543,7 +564,9 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) |
2786 | |
2787 | ns->shm_tot += numpages; |
2788 | error = shp->shm_perm.id; |
2789 | - shm_unlock(shp); |
2790 | + |
2791 | + ipc_unlock_object(&shp->shm_perm); |
2792 | + rcu_read_unlock(); |
2793 | return error; |
2794 | |
2795 | no_id: |
2796 | @@ -551,13 +574,12 @@ no_id: |
2797 | user_shm_unlock(size, shp->mlock_user); |
2798 | fput(file); |
2799 | no_file: |
2800 | - security_shm_free(shp); |
2801 | - ipc_rcu_putref(shp); |
2802 | + ipc_rcu_putref(shp, shm_rcu_free); |
2803 | return error; |
2804 | } |
2805 | |
2806 | /* |
2807 | - * Called with shm_ids.rw_mutex and ipcp locked. |
2808 | + * Called with shm_ids.rwsem and ipcp locked. |
2809 | */ |
2810 | static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) |
2811 | { |
2812 | @@ -568,7 +590,7 @@ static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) |
2813 | } |
2814 | |
2815 | /* |
2816 | - * Called with shm_ids.rw_mutex and ipcp locked. |
2817 | + * Called with shm_ids.rwsem and ipcp locked. |
2818 | */ |
2819 | static inline int shm_more_checks(struct kern_ipc_perm *ipcp, |
2820 | struct ipc_params *params) |
2821 | @@ -681,7 +703,7 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf |
2822 | |
2823 | /* |
2824 | * Calculate and add used RSS and swap pages of a shm. |
2825 | - * Called with shm_ids.rw_mutex held as a reader |
2826 | + * Called with shm_ids.rwsem held as a reader |
2827 | */ |
2828 | static void shm_add_rss_swap(struct shmid_kernel *shp, |
2829 | unsigned long *rss_add, unsigned long *swp_add) |
2830 | @@ -708,7 +730,7 @@ static void shm_add_rss_swap(struct shmid_kernel *shp, |
2831 | } |
2832 | |
2833 | /* |
2834 | - * Called with shm_ids.rw_mutex held as a reader |
2835 | + * Called with shm_ids.rwsem held as a reader |
2836 | */ |
2837 | static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, |
2838 | unsigned long *swp) |
2839 | @@ -737,9 +759,9 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, |
2840 | } |
2841 | |
2842 | /* |
2843 | - * This function handles some shmctl commands which require the rw_mutex |
2844 | + * This function handles some shmctl commands which require the rwsem |
2845 | * to be held in write mode. |
2846 | - * NOTE: no locks must be held, the rw_mutex is taken inside this function. |
2847 | + * NOTE: no locks must be held, the rwsem is taken inside this function. |
2848 | */ |
2849 | static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, |
2850 | struct shmid_ds __user *buf, int version) |
2851 | @@ -754,59 +776,67 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, |
2852 | return -EFAULT; |
2853 | } |
2854 | |
2855 | - ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd, |
2856 | - &shmid64.shm_perm, 0); |
2857 | - if (IS_ERR(ipcp)) |
2858 | - return PTR_ERR(ipcp); |
2859 | + down_write(&shm_ids(ns).rwsem); |
2860 | + rcu_read_lock(); |
2861 | + |
2862 | + ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd, |
2863 | + &shmid64.shm_perm, 0); |
2864 | + if (IS_ERR(ipcp)) { |
2865 | + err = PTR_ERR(ipcp); |
2866 | + goto out_unlock1; |
2867 | + } |
2868 | |
2869 | shp = container_of(ipcp, struct shmid_kernel, shm_perm); |
2870 | |
2871 | err = security_shm_shmctl(shp, cmd); |
2872 | if (err) |
2873 | - goto out_unlock; |
2874 | + goto out_unlock1; |
2875 | + |
2876 | switch (cmd) { |
2877 | case IPC_RMID: |
2878 | + ipc_lock_object(&shp->shm_perm); |
2879 | + /* do_shm_rmid unlocks the ipc object and rcu */ |
2880 | do_shm_rmid(ns, ipcp); |
2881 | goto out_up; |
2882 | case IPC_SET: |
2883 | + ipc_lock_object(&shp->shm_perm); |
2884 | err = ipc_update_perm(&shmid64.shm_perm, ipcp); |
2885 | if (err) |
2886 | - goto out_unlock; |
2887 | + goto out_unlock0; |
2888 | shp->shm_ctim = get_seconds(); |
2889 | break; |
2890 | default: |
2891 | err = -EINVAL; |
2892 | + goto out_unlock1; |
2893 | } |
2894 | -out_unlock: |
2895 | - shm_unlock(shp); |
2896 | + |
2897 | +out_unlock0: |
2898 | + ipc_unlock_object(&shp->shm_perm); |
2899 | +out_unlock1: |
2900 | + rcu_read_unlock(); |
2901 | out_up: |
2902 | - up_write(&shm_ids(ns).rw_mutex); |
2903 | + up_write(&shm_ids(ns).rwsem); |
2904 | return err; |
2905 | } |
2906 | |
2907 | -SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) |
2908 | +static int shmctl_nolock(struct ipc_namespace *ns, int shmid, |
2909 | + int cmd, int version, void __user *buf) |
2910 | { |
2911 | + int err; |
2912 | struct shmid_kernel *shp; |
2913 | - int err, version; |
2914 | - struct ipc_namespace *ns; |
2915 | |
2916 | - if (cmd < 0 || shmid < 0) { |
2917 | - err = -EINVAL; |
2918 | - goto out; |
2919 | + /* preliminary security checks for *_INFO */ |
2920 | + if (cmd == IPC_INFO || cmd == SHM_INFO) { |
2921 | + err = security_shm_shmctl(NULL, cmd); |
2922 | + if (err) |
2923 | + return err; |
2924 | } |
2925 | |
2926 | - version = ipc_parse_version(&cmd); |
2927 | - ns = current->nsproxy->ipc_ns; |
2928 | - |
2929 | - switch (cmd) { /* replace with proc interface ? */ |
2930 | + switch (cmd) { |
2931 | case IPC_INFO: |
2932 | { |
2933 | struct shminfo64 shminfo; |
2934 | |
2935 | - err = security_shm_shmctl(NULL, cmd); |
2936 | - if (err) |
2937 | - return err; |
2938 | - |
2939 | memset(&shminfo, 0, sizeof(shminfo)); |
2940 | shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni; |
2941 | shminfo.shmmax = ns->shm_ctlmax; |
2942 | @@ -816,9 +846,9 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) |
2943 | if(copy_shminfo_to_user (buf, &shminfo, version)) |
2944 | return -EFAULT; |
2945 | |
2946 | - down_read(&shm_ids(ns).rw_mutex); |
2947 | + down_read(&shm_ids(ns).rwsem); |
2948 | err = ipc_get_maxid(&shm_ids(ns)); |
2949 | - up_read(&shm_ids(ns).rw_mutex); |
2950 | + up_read(&shm_ids(ns).rwsem); |
2951 | |
2952 | if(err<0) |
2953 | err = 0; |
2954 | @@ -828,19 +858,15 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) |
2955 | { |
2956 | struct shm_info shm_info; |
2957 | |
2958 | - err = security_shm_shmctl(NULL, cmd); |
2959 | - if (err) |
2960 | - return err; |
2961 | - |
2962 | memset(&shm_info, 0, sizeof(shm_info)); |
2963 | - down_read(&shm_ids(ns).rw_mutex); |
2964 | + down_read(&shm_ids(ns).rwsem); |
2965 | shm_info.used_ids = shm_ids(ns).in_use; |
2966 | shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp); |
2967 | shm_info.shm_tot = ns->shm_tot; |
2968 | shm_info.swap_attempts = 0; |
2969 | shm_info.swap_successes = 0; |
2970 | err = ipc_get_maxid(&shm_ids(ns)); |
2971 | - up_read(&shm_ids(ns).rw_mutex); |
2972 | + up_read(&shm_ids(ns).rwsem); |
2973 | if (copy_to_user(buf, &shm_info, sizeof(shm_info))) { |
2974 | err = -EFAULT; |
2975 | goto out; |
2976 | @@ -855,27 +881,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) |
2977 | struct shmid64_ds tbuf; |
2978 | int result; |
2979 | |
2980 | + rcu_read_lock(); |
2981 | if (cmd == SHM_STAT) { |
2982 | - shp = shm_lock(ns, shmid); |
2983 | + shp = shm_obtain_object(ns, shmid); |
2984 | if (IS_ERR(shp)) { |
2985 | err = PTR_ERR(shp); |
2986 | - goto out; |
2987 | + goto out_unlock; |
2988 | } |
2989 | result = shp->shm_perm.id; |
2990 | } else { |
2991 | - shp = shm_lock_check(ns, shmid); |
2992 | + shp = shm_obtain_object_check(ns, shmid); |
2993 | if (IS_ERR(shp)) { |
2994 | err = PTR_ERR(shp); |
2995 | - goto out; |
2996 | + goto out_unlock; |
2997 | } |
2998 | result = 0; |
2999 | } |
3000 | + |
3001 | err = -EACCES; |
3002 | if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) |
3003 | goto out_unlock; |
3004 | + |
3005 | err = security_shm_shmctl(shp, cmd); |
3006 | if (err) |
3007 | goto out_unlock; |
3008 | + |
3009 | memset(&tbuf, 0, sizeof(tbuf)); |
3010 | kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm); |
3011 | tbuf.shm_segsz = shp->shm_segsz; |
3012 | @@ -885,43 +915,76 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) |
3013 | tbuf.shm_cpid = shp->shm_cprid; |
3014 | tbuf.shm_lpid = shp->shm_lprid; |
3015 | tbuf.shm_nattch = shp->shm_nattch; |
3016 | - shm_unlock(shp); |
3017 | - if(copy_shmid_to_user (buf, &tbuf, version)) |
3018 | + rcu_read_unlock(); |
3019 | + |
3020 | + if (copy_shmid_to_user(buf, &tbuf, version)) |
3021 | err = -EFAULT; |
3022 | else |
3023 | err = result; |
3024 | goto out; |
3025 | } |
3026 | + default: |
3027 | + return -EINVAL; |
3028 | + } |
3029 | + |
3030 | +out_unlock: |
3031 | + rcu_read_unlock(); |
3032 | +out: |
3033 | + return err; |
3034 | +} |
3035 | + |
3036 | +SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) |
3037 | +{ |
3038 | + struct shmid_kernel *shp; |
3039 | + int err, version; |
3040 | + struct ipc_namespace *ns; |
3041 | + |
3042 | + if (cmd < 0 || shmid < 0) |
3043 | + return -EINVAL; |
3044 | + |
3045 | + version = ipc_parse_version(&cmd); |
3046 | + ns = current->nsproxy->ipc_ns; |
3047 | + |
3048 | + switch (cmd) { |
3049 | + case IPC_INFO: |
3050 | + case SHM_INFO: |
3051 | + case SHM_STAT: |
3052 | + case IPC_STAT: |
3053 | + return shmctl_nolock(ns, shmid, cmd, version, buf); |
3054 | + case IPC_RMID: |
3055 | + case IPC_SET: |
3056 | + return shmctl_down(ns, shmid, cmd, buf, version); |
3057 | case SHM_LOCK: |
3058 | case SHM_UNLOCK: |
3059 | { |
3060 | struct file *shm_file; |
3061 | |
3062 | - shp = shm_lock_check(ns, shmid); |
3063 | + rcu_read_lock(); |
3064 | + shp = shm_obtain_object_check(ns, shmid); |
3065 | if (IS_ERR(shp)) { |
3066 | err = PTR_ERR(shp); |
3067 | - goto out; |
3068 | + goto out_unlock1; |
3069 | } |
3070 | |
3071 | audit_ipc_obj(&(shp->shm_perm)); |
3072 | + err = security_shm_shmctl(shp, cmd); |
3073 | + if (err) |
3074 | + goto out_unlock1; |
3075 | |
3076 | + ipc_lock_object(&shp->shm_perm); |
3077 | if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { |
3078 | kuid_t euid = current_euid(); |
3079 | err = -EPERM; |
3080 | if (!uid_eq(euid, shp->shm_perm.uid) && |
3081 | !uid_eq(euid, shp->shm_perm.cuid)) |
3082 | - goto out_unlock; |
3083 | + goto out_unlock0; |
3084 | if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) |
3085 | - goto out_unlock; |
3086 | + goto out_unlock0; |
3087 | } |
3088 | |
3089 | - err = security_shm_shmctl(shp, cmd); |
3090 | - if (err) |
3091 | - goto out_unlock; |
3092 | - |
3093 | shm_file = shp->shm_file; |
3094 | if (is_file_hugepages(shm_file)) |
3095 | - goto out_unlock; |
3096 | + goto out_unlock0; |
3097 | |
3098 | if (cmd == SHM_LOCK) { |
3099 | struct user_struct *user = current_user(); |
3100 | @@ -930,32 +993,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) |
3101 | shp->shm_perm.mode |= SHM_LOCKED; |
3102 | shp->mlock_user = user; |
3103 | } |
3104 | - goto out_unlock; |
3105 | + goto out_unlock0; |
3106 | } |
3107 | |
3108 | /* SHM_UNLOCK */ |
3109 | if (!(shp->shm_perm.mode & SHM_LOCKED)) |
3110 | - goto out_unlock; |
3111 | + goto out_unlock0; |
3112 | shmem_lock(shm_file, 0, shp->mlock_user); |
3113 | shp->shm_perm.mode &= ~SHM_LOCKED; |
3114 | shp->mlock_user = NULL; |
3115 | get_file(shm_file); |
3116 | - shm_unlock(shp); |
3117 | + ipc_unlock_object(&shp->shm_perm); |
3118 | + rcu_read_unlock(); |
3119 | shmem_unlock_mapping(shm_file->f_mapping); |
3120 | + |
3121 | fput(shm_file); |
3122 | - goto out; |
3123 | - } |
3124 | - case IPC_RMID: |
3125 | - case IPC_SET: |
3126 | - err = shmctl_down(ns, shmid, cmd, buf, version); |
3127 | return err; |
3128 | + } |
3129 | default: |
3130 | return -EINVAL; |
3131 | } |
3132 | |
3133 | -out_unlock: |
3134 | - shm_unlock(shp); |
3135 | -out: |
3136 | +out_unlock0: |
3137 | + ipc_unlock_object(&shp->shm_perm); |
3138 | +out_unlock1: |
3139 | + rcu_read_unlock(); |
3140 | return err; |
3141 | } |
3142 | |
3143 | @@ -1023,10 +1085,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, |
3144 | * additional creator id... |
3145 | */ |
3146 | ns = current->nsproxy->ipc_ns; |
3147 | - shp = shm_lock_check(ns, shmid); |
3148 | + rcu_read_lock(); |
3149 | + shp = shm_obtain_object_check(ns, shmid); |
3150 | if (IS_ERR(shp)) { |
3151 | err = PTR_ERR(shp); |
3152 | - goto out; |
3153 | + goto out_unlock; |
3154 | } |
3155 | |
3156 | err = -EACCES; |
3157 | @@ -1037,24 +1100,31 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, |
3158 | if (err) |
3159 | goto out_unlock; |
3160 | |
3161 | + ipc_lock_object(&shp->shm_perm); |
3162 | path = shp->shm_file->f_path; |
3163 | path_get(&path); |
3164 | shp->shm_nattch++; |
3165 | size = i_size_read(path.dentry->d_inode); |
3166 | - shm_unlock(shp); |
3167 | + ipc_unlock_object(&shp->shm_perm); |
3168 | + rcu_read_unlock(); |
3169 | |
3170 | err = -ENOMEM; |
3171 | sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); |
3172 | - if (!sfd) |
3173 | - goto out_put_dentry; |
3174 | + if (!sfd) { |
3175 | + path_put(&path); |
3176 | + goto out_nattch; |
3177 | + } |
3178 | |
3179 | file = alloc_file(&path, f_mode, |
3180 | is_file_hugepages(shp->shm_file) ? |
3181 | &shm_file_operations_huge : |
3182 | &shm_file_operations); |
3183 | err = PTR_ERR(file); |
3184 | - if (IS_ERR(file)) |
3185 | - goto out_free; |
3186 | + if (IS_ERR(file)) { |
3187 | + kfree(sfd); |
3188 | + path_put(&path); |
3189 | + goto out_nattch; |
3190 | + } |
3191 | |
3192 | file->private_data = sfd; |
3193 | file->f_mapping = shp->shm_file->f_mapping; |
3194 | @@ -1080,7 +1150,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, |
3195 | addr > current->mm->start_stack - size - PAGE_SIZE * 5) |
3196 | goto invalid; |
3197 | } |
3198 | - |
3199 | + |
3200 | addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate); |
3201 | *raddr = addr; |
3202 | err = 0; |
3203 | @@ -1095,7 +1165,7 @@ out_fput: |
3204 | fput(file); |
3205 | |
3206 | out_nattch: |
3207 | - down_write(&shm_ids(ns).rw_mutex); |
3208 | + down_write(&shm_ids(ns).rwsem); |
3209 | shp = shm_lock(ns, shmid); |
3210 | BUG_ON(IS_ERR(shp)); |
3211 | shp->shm_nattch--; |
3212 | @@ -1103,20 +1173,13 @@ out_nattch: |
3213 | shm_destroy(ns, shp); |
3214 | else |
3215 | shm_unlock(shp); |
3216 | - up_write(&shm_ids(ns).rw_mutex); |
3217 | - |
3218 | -out: |
3219 | + up_write(&shm_ids(ns).rwsem); |
3220 | return err; |
3221 | |
3222 | out_unlock: |
3223 | - shm_unlock(shp); |
3224 | - goto out; |
3225 | - |
3226 | -out_free: |
3227 | - kfree(sfd); |
3228 | -out_put_dentry: |
3229 | - path_put(&path); |
3230 | - goto out_nattch; |
3231 | + rcu_read_unlock(); |
3232 | +out: |
3233 | + return err; |
3234 | } |
3235 | |
3236 | SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) |
3237 | @@ -1221,8 +1284,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) |
3238 | #else /* CONFIG_MMU */ |
3239 | /* under NOMMU conditions, the exact address to be destroyed must be |
3240 | * given */ |
3241 | - retval = -EINVAL; |
3242 | - if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { |
3243 | + if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { |
3244 | do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); |
3245 | retval = 0; |
3246 | } |
3247 | diff --git a/ipc/util.c b/ipc/util.c |
3248 | index 809ec5e..fdb8ae7 100644 |
3249 | --- a/ipc/util.c |
3250 | +++ b/ipc/util.c |
3251 | @@ -15,6 +15,14 @@ |
3252 | * Jun 2006 - namespaces ssupport |
3253 | * OpenVZ, SWsoft Inc. |
3254 | * Pavel Emelianov <xemul@openvz.org> |
3255 | + * |
3256 | + * General sysv ipc locking scheme: |
3257 | + * when doing ipc id lookups, take the ids->rwsem |
3258 | + * rcu_read_lock() |
3259 | + * obtain the ipc object (kern_ipc_perm) |
3260 | + * perform security, capabilities, auditing and permission checks, etc. |
3261 | + * acquire the ipc lock (kern_ipc_perm.lock) throught ipc_lock_object() |
3262 | + * perform data updates (ie: SET, RMID, LOCK/UNLOCK commands) |
3263 | */ |
3264 | |
3265 | #include <linux/mm.h> |
3266 | @@ -119,7 +127,7 @@ __initcall(ipc_init); |
3267 | |
3268 | void ipc_init_ids(struct ipc_ids *ids) |
3269 | { |
3270 | - init_rwsem(&ids->rw_mutex); |
3271 | + init_rwsem(&ids->rwsem); |
3272 | |
3273 | ids->in_use = 0; |
3274 | ids->seq = 0; |
3275 | @@ -174,7 +182,7 @@ void __init ipc_init_proc_interface(const char *path, const char *header, |
3276 | * @ids: Identifier set |
3277 | * @key: The key to find |
3278 | * |
3279 | - * Requires ipc_ids.rw_mutex locked. |
3280 | + * Requires ipc_ids.rwsem locked. |
3281 | * Returns the LOCKED pointer to the ipc structure if found or NULL |
3282 | * if not. |
3283 | * If key is found ipc points to the owning ipc structure |
3284 | @@ -197,7 +205,8 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) |
3285 | continue; |
3286 | } |
3287 | |
3288 | - ipc_lock_by_ptr(ipc); |
3289 | + rcu_read_lock(); |
3290 | + ipc_lock_object(ipc); |
3291 | return ipc; |
3292 | } |
3293 | |
3294 | @@ -208,7 +217,7 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) |
3295 | * ipc_get_maxid - get the last assigned id |
3296 | * @ids: IPC identifier set |
3297 | * |
3298 | - * Called with ipc_ids.rw_mutex held. |
3299 | + * Called with ipc_ids.rwsem held. |
3300 | */ |
3301 | |
3302 | int ipc_get_maxid(struct ipc_ids *ids) |
3303 | @@ -246,9 +255,8 @@ int ipc_get_maxid(struct ipc_ids *ids) |
3304 | * is returned. The 'new' entry is returned in a locked state on success. |
3305 | * On failure the entry is not locked and a negative err-code is returned. |
3306 | * |
3307 | - * Called with ipc_ids.rw_mutex held as a writer. |
3308 | + * Called with writer ipc_ids.rwsem held. |
3309 | */ |
3310 | - |
3311 | int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) |
3312 | { |
3313 | kuid_t euid; |
3314 | @@ -313,9 +321,9 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids, |
3315 | { |
3316 | int err; |
3317 | |
3318 | - down_write(&ids->rw_mutex); |
3319 | + down_write(&ids->rwsem); |
3320 | err = ops->getnew(ns, params); |
3321 | - up_write(&ids->rw_mutex); |
3322 | + up_write(&ids->rwsem); |
3323 | return err; |
3324 | } |
3325 | |
3326 | @@ -332,7 +340,7 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids, |
3327 | * |
3328 | * On success, the IPC id is returned. |
3329 | * |
3330 | - * It is called with ipc_ids.rw_mutex and ipcp->lock held. |
3331 | + * It is called with ipc_ids.rwsem and ipcp->lock held. |
3332 | */ |
3333 | static int ipc_check_perms(struct ipc_namespace *ns, |
3334 | struct kern_ipc_perm *ipcp, |
3335 | @@ -377,7 +385,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, |
3336 | * Take the lock as a writer since we are potentially going to add |
3337 | * a new entry + read locks are not "upgradable" |
3338 | */ |
3339 | - down_write(&ids->rw_mutex); |
3340 | + down_write(&ids->rwsem); |
3341 | ipcp = ipc_findkey(ids, params->key); |
3342 | if (ipcp == NULL) { |
3343 | /* key not used */ |
3344 | @@ -403,7 +411,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, |
3345 | } |
3346 | ipc_unlock(ipcp); |
3347 | } |
3348 | - up_write(&ids->rw_mutex); |
3349 | + up_write(&ids->rwsem); |
3350 | |
3351 | return err; |
3352 | } |
3353 | @@ -414,7 +422,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, |
3354 | * @ids: IPC identifier set |
3355 | * @ipcp: ipc perm structure containing the identifier to remove |
3356 | * |
3357 | - * ipc_ids.rw_mutex (as a writer) and the spinlock for this ID are held |
3358 | + * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held |
3359 | * before this function is called, and remain locked on the exit. |
3360 | */ |
3361 | |
3362 | @@ -466,13 +474,6 @@ void ipc_free(void* ptr, int size) |
3363 | kfree(ptr); |
3364 | } |
3365 | |
3366 | -struct ipc_rcu { |
3367 | - struct rcu_head rcu; |
3368 | - atomic_t refcount; |
3369 | - /* "void *" makes sure alignment of following data is sane. */ |
3370 | - void *data[0]; |
3371 | -}; |
3372 | - |
3373 | /** |
3374 | * ipc_rcu_alloc - allocate ipc and rcu space |
3375 | * @size: size desired |
3376 | @@ -489,35 +490,34 @@ void *ipc_rcu_alloc(int size) |
3377 | if (unlikely(!out)) |
3378 | return NULL; |
3379 | atomic_set(&out->refcount, 1); |
3380 | - return out->data; |
3381 | + return out + 1; |
3382 | } |
3383 | |
3384 | int ipc_rcu_getref(void *ptr) |
3385 | { |
3386 | - return atomic_inc_not_zero(&container_of(ptr, struct ipc_rcu, data)->refcount); |
3387 | -} |
3388 | + struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; |
3389 | |
3390 | -/** |
3391 | - * ipc_schedule_free - free ipc + rcu space |
3392 | - * @head: RCU callback structure for queued work |
3393 | - */ |
3394 | -static void ipc_schedule_free(struct rcu_head *head) |
3395 | -{ |
3396 | - vfree(container_of(head, struct ipc_rcu, rcu)); |
3397 | + return atomic_inc_not_zero(&p->refcount); |
3398 | } |
3399 | |
3400 | -void ipc_rcu_putref(void *ptr) |
3401 | +void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head)) |
3402 | { |
3403 | - struct ipc_rcu *p = container_of(ptr, struct ipc_rcu, data); |
3404 | + struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; |
3405 | |
3406 | if (!atomic_dec_and_test(&p->refcount)) |
3407 | return; |
3408 | |
3409 | - if (is_vmalloc_addr(ptr)) { |
3410 | - call_rcu(&p->rcu, ipc_schedule_free); |
3411 | - } else { |
3412 | - kfree_rcu(p, rcu); |
3413 | - } |
3414 | + call_rcu(&p->rcu, func); |
3415 | +} |
3416 | + |
3417 | +void ipc_rcu_free(struct rcu_head *head) |
3418 | +{ |
3419 | + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); |
3420 | + |
3421 | + if (is_vmalloc_addr(p)) |
3422 | + vfree(p); |
3423 | + else |
3424 | + kfree(p); |
3425 | } |
3426 | |
3427 | /** |
3428 | @@ -622,7 +622,7 @@ struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id) |
3429 | } |
3430 | |
3431 | /** |
3432 | - * ipc_lock - Lock an ipc structure without rw_mutex held |
3433 | + * ipc_lock - Lock an ipc structure without rwsem held |
3434 | * @ids: IPC identifier set |
3435 | * @id: ipc id to look for |
3436 | * |
3437 | @@ -678,22 +678,6 @@ out: |
3438 | return out; |
3439 | } |
3440 | |
3441 | -struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id) |
3442 | -{ |
3443 | - struct kern_ipc_perm *out; |
3444 | - |
3445 | - out = ipc_lock(ids, id); |
3446 | - if (IS_ERR(out)) |
3447 | - return out; |
3448 | - |
3449 | - if (ipc_checkid(out, id)) { |
3450 | - ipc_unlock(out); |
3451 | - return ERR_PTR(-EIDRM); |
3452 | - } |
3453 | - |
3454 | - return out; |
3455 | -} |
3456 | - |
3457 | /** |
3458 | * ipcget - Common sys_*get() code |
3459 | * @ns : namsepace |
3460 | @@ -734,7 +718,7 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) |
3461 | } |
3462 | |
3463 | /** |
3464 | - * ipcctl_pre_down - retrieve an ipc and check permissions for some IPC_XXX cmd |
3465 | + * ipcctl_pre_down_nolock - retrieve an ipc and check permissions for some IPC_XXX cmd |
3466 | * @ns: the ipc namespace |
3467 | * @ids: the table of ids where to look for the ipc |
3468 | * @id: the id of the ipc to retrieve |
3469 | @@ -747,39 +731,22 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) |
3470 | * It must be called without any lock held and |
3471 | * - retrieves the ipc with the given id in the given table. |
3472 | * - performs some audit and permission check, depending on the given cmd |
3473 | - * - returns the ipc with both ipc and rw_mutex locks held in case of success |
3474 | - * or an err-code without any lock held otherwise. |
3475 | + * - returns a pointer to the ipc object or otherwise, the corresponding error. |
3476 | + * |
3477 | + * Call holding the both the rwsem and the rcu read lock. |
3478 | */ |
3479 | -struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, |
3480 | - struct ipc_ids *ids, int id, int cmd, |
3481 | - struct ipc64_perm *perm, int extra_perm) |
3482 | -{ |
3483 | - struct kern_ipc_perm *ipcp; |
3484 | - |
3485 | - ipcp = ipcctl_pre_down_nolock(ns, ids, id, cmd, perm, extra_perm); |
3486 | - if (IS_ERR(ipcp)) |
3487 | - goto out; |
3488 | - |
3489 | - spin_lock(&ipcp->lock); |
3490 | -out: |
3491 | - return ipcp; |
3492 | -} |
3493 | - |
3494 | struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, |
3495 | - struct ipc_ids *ids, int id, int cmd, |
3496 | - struct ipc64_perm *perm, int extra_perm) |
3497 | + struct ipc_ids *ids, int id, int cmd, |
3498 | + struct ipc64_perm *perm, int extra_perm) |
3499 | { |
3500 | kuid_t euid; |
3501 | int err = -EPERM; |
3502 | struct kern_ipc_perm *ipcp; |
3503 | |
3504 | - down_write(&ids->rw_mutex); |
3505 | - rcu_read_lock(); |
3506 | - |
3507 | ipcp = ipc_obtain_object_check(ids, id); |
3508 | if (IS_ERR(ipcp)) { |
3509 | err = PTR_ERR(ipcp); |
3510 | - goto out_up; |
3511 | + goto err; |
3512 | } |
3513 | |
3514 | audit_ipc_obj(ipcp); |
3515 | @@ -790,16 +757,8 @@ struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, |
3516 | euid = current_euid(); |
3517 | if (uid_eq(euid, ipcp->cuid) || uid_eq(euid, ipcp->uid) || |
3518 | ns_capable(ns->user_ns, CAP_SYS_ADMIN)) |
3519 | - return ipcp; |
3520 | - |
3521 | -out_up: |
3522 | - /* |
3523 | - * Unsuccessful lookup, unlock and return |
3524 | - * the corresponding error. |
3525 | - */ |
3526 | - rcu_read_unlock(); |
3527 | - up_write(&ids->rw_mutex); |
3528 | - |
3529 | + return ipcp; /* successful lookup */ |
3530 | +err: |
3531 | return ERR_PTR(err); |
3532 | } |
3533 | |
3534 | @@ -856,7 +815,8 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, |
3535 | ipc = idr_find(&ids->ipcs_idr, pos); |
3536 | if (ipc != NULL) { |
3537 | *new_pos = pos + 1; |
3538 | - ipc_lock_by_ptr(ipc); |
3539 | + rcu_read_lock(); |
3540 | + ipc_lock_object(ipc); |
3541 | return ipc; |
3542 | } |
3543 | } |
3544 | @@ -894,7 +854,7 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos) |
3545 | * Take the lock - this will be released by the corresponding |
3546 | * call to stop(). |
3547 | */ |
3548 | - down_read(&ids->rw_mutex); |
3549 | + down_read(&ids->rwsem); |
3550 | |
3551 | /* pos < 0 is invalid */ |
3552 | if (*pos < 0) |
3553 | @@ -921,7 +881,7 @@ static void sysvipc_proc_stop(struct seq_file *s, void *it) |
3554 | |
3555 | ids = &iter->ns->ids[iface->ids]; |
3556 | /* Release the lock we took in start() */ |
3557 | - up_read(&ids->rw_mutex); |
3558 | + up_read(&ids->rwsem); |
3559 | } |
3560 | |
3561 | static int sysvipc_proc_show(struct seq_file *s, void *it) |
3562 | diff --git a/ipc/util.h b/ipc/util.h |
3563 | index 2b0bdd5..f2f5036 100644 |
3564 | --- a/ipc/util.h |
3565 | +++ b/ipc/util.h |
3566 | @@ -47,6 +47,13 @@ static inline void msg_exit_ns(struct ipc_namespace *ns) { } |
3567 | static inline void shm_exit_ns(struct ipc_namespace *ns) { } |
3568 | #endif |
3569 | |
3570 | +struct ipc_rcu { |
3571 | + struct rcu_head rcu; |
3572 | + atomic_t refcount; |
3573 | +} ____cacheline_aligned_in_smp; |
3574 | + |
3575 | +#define ipc_rcu_to_struct(p) ((void *)(p+1)) |
3576 | + |
3577 | /* |
3578 | * Structure that holds the parameters needed by the ipc operations |
3579 | * (see after) |
3580 | @@ -94,10 +101,10 @@ void __init ipc_init_proc_interface(const char *path, const char *header, |
3581 | #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER) |
3582 | #define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER) |
3583 | |
3584 | -/* must be called with ids->rw_mutex acquired for writing */ |
3585 | +/* must be called with ids->rwsem acquired for writing */ |
3586 | int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); |
3587 | |
3588 | -/* must be called with ids->rw_mutex acquired for reading */ |
3589 | +/* must be called with ids->rwsem acquired for reading */ |
3590 | int ipc_get_maxid(struct ipc_ids *); |
3591 | |
3592 | /* must be called with both locks acquired. */ |
3593 | @@ -120,7 +127,8 @@ void ipc_free(void* ptr, int size); |
3594 | */ |
3595 | void* ipc_rcu_alloc(int size); |
3596 | int ipc_rcu_getref(void *ptr); |
3597 | -void ipc_rcu_putref(void *ptr); |
3598 | +void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head)); |
3599 | +void ipc_rcu_free(struct rcu_head *head); |
3600 | |
3601 | struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); |
3602 | struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id); |
3603 | @@ -131,9 +139,6 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out); |
3604 | struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, |
3605 | struct ipc_ids *ids, int id, int cmd, |
3606 | struct ipc64_perm *perm, int extra_perm); |
3607 | -struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, |
3608 | - struct ipc_ids *ids, int id, int cmd, |
3609 | - struct ipc64_perm *perm, int extra_perm); |
3610 | |
3611 | #ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION |
3612 | /* On IA-64, we always use the "64-bit version" of the IPC structures. */ |
3613 | @@ -159,24 +164,27 @@ static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid) |
3614 | return uid / SEQ_MULTIPLIER != ipcp->seq; |
3615 | } |
3616 | |
3617 | -static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm) |
3618 | +static inline void ipc_lock_object(struct kern_ipc_perm *perm) |
3619 | { |
3620 | - rcu_read_lock(); |
3621 | spin_lock(&perm->lock); |
3622 | } |
3623 | |
3624 | -static inline void ipc_unlock(struct kern_ipc_perm *perm) |
3625 | +static inline void ipc_unlock_object(struct kern_ipc_perm *perm) |
3626 | { |
3627 | spin_unlock(&perm->lock); |
3628 | - rcu_read_unlock(); |
3629 | } |
3630 | |
3631 | -static inline void ipc_lock_object(struct kern_ipc_perm *perm) |
3632 | +static inline void ipc_assert_locked_object(struct kern_ipc_perm *perm) |
3633 | { |
3634 | - spin_lock(&perm->lock); |
3635 | + assert_spin_locked(&perm->lock); |
3636 | +} |
3637 | + |
3638 | +static inline void ipc_unlock(struct kern_ipc_perm *perm) |
3639 | +{ |
3640 | + ipc_unlock_object(perm); |
3641 | + rcu_read_unlock(); |
3642 | } |
3643 | |
3644 | -struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id); |
3645 | struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id); |
3646 | int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, |
3647 | struct ipc_ops *ops, struct ipc_params *params); |
3648 | diff --git a/mm/shmem.c b/mm/shmem.c |
3649 | index 5e6a842..509b393 100644 |
3650 | --- a/mm/shmem.c |
3651 | +++ b/mm/shmem.c |
3652 | @@ -2879,14 +2879,8 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range); |
3653 | |
3654 | /* common code */ |
3655 | |
3656 | -static char *shmem_dname(struct dentry *dentry, char *buffer, int buflen) |
3657 | -{ |
3658 | - return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)", |
3659 | - dentry->d_name.name); |
3660 | -} |
3661 | - |
3662 | static struct dentry_operations anon_ops = { |
3663 | - .d_dname = shmem_dname |
3664 | + .d_dname = simple_dname |
3665 | }; |
3666 | |
3667 | /** |
3668 | diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c |
3669 | index b5375ed..aecf088 100644 |
3670 | --- a/sound/pci/hda/patch_hdmi.c |
3671 | +++ b/sound/pci/hda/patch_hdmi.c |
3672 | @@ -930,6 +930,14 @@ static void hdmi_setup_audio_infoframe(struct hda_codec *codec, |
3673 | } |
3674 | |
3675 | /* |
3676 | + * always configure channel mapping, it may have been changed by the |
3677 | + * user in the meantime |
3678 | + */ |
3679 | + hdmi_setup_channel_mapping(codec, pin_nid, non_pcm, ca, |
3680 | + channels, per_pin->chmap, |
3681 | + per_pin->chmap_set); |
3682 | + |
3683 | + /* |
3684 | * sizeof(ai) is used instead of sizeof(*hdmi_ai) or |
3685 | * sizeof(*dp_ai) to avoid partial match/update problems when |
3686 | * the user switches between HDMI/DP monitors. |
3687 | @@ -940,20 +948,10 @@ static void hdmi_setup_audio_infoframe(struct hda_codec *codec, |
3688 | "pin=%d channels=%d\n", |
3689 | pin_nid, |
3690 | channels); |
3691 | - hdmi_setup_channel_mapping(codec, pin_nid, non_pcm, ca, |
3692 | - channels, per_pin->chmap, |
3693 | - per_pin->chmap_set); |
3694 | hdmi_stop_infoframe_trans(codec, pin_nid); |
3695 | hdmi_fill_audio_infoframe(codec, pin_nid, |
3696 | ai.bytes, sizeof(ai)); |
3697 | hdmi_start_infoframe_trans(codec, pin_nid); |
3698 | - } else { |
3699 | - /* For non-pcm audio switch, setup new channel mapping |
3700 | - * accordingly */ |
3701 | - if (per_pin->non_pcm != non_pcm) |
3702 | - hdmi_setup_channel_mapping(codec, pin_nid, non_pcm, ca, |
3703 | - channels, per_pin->chmap, |
3704 | - per_pin->chmap_set); |
3705 | } |
3706 | |
3707 | per_pin->non_pcm = non_pcm; |
3708 | diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c |
3709 | index 458cf89..21b6649 100644 |
3710 | --- a/sound/pci/hda/patch_realtek.c |
3711 | +++ b/sound/pci/hda/patch_realtek.c |
3712 | @@ -3200,6 +3200,15 @@ static void alc269_fixup_limit_int_mic_boost(struct hda_codec *codec, |
3713 | } |
3714 | } |
3715 | |
3716 | +static void alc290_fixup_mono_speakers(struct hda_codec *codec, |
3717 | + const struct hda_fixup *fix, int action) |
3718 | +{ |
3719 | + if (action == HDA_FIXUP_ACT_PRE_PROBE) |
3720 | + /* Remove DAC node 0x03, as it seems to be |
3721 | + giving mono output */ |
3722 | + snd_hda_override_wcaps(codec, 0x03, 0); |
3723 | +} |
3724 | + |
3725 | enum { |
3726 | ALC269_FIXUP_SONY_VAIO, |
3727 | ALC275_FIXUP_SONY_VAIO_GPIO2, |
3728 | @@ -3223,9 +3232,12 @@ enum { |
3729 | ALC269_FIXUP_HP_GPIO_LED, |
3730 | ALC269_FIXUP_INV_DMIC, |
3731 | ALC269_FIXUP_LENOVO_DOCK, |
3732 | + ALC286_FIXUP_SONY_MIC_NO_PRESENCE, |
3733 | ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT, |
3734 | ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, |
3735 | ALC269_FIXUP_DELL2_MIC_NO_PRESENCE, |
3736 | + ALC269_FIXUP_DELL3_MIC_NO_PRESENCE, |
3737 | + ALC290_FIXUP_MONO_SPEAKERS, |
3738 | ALC269_FIXUP_HEADSET_MODE, |
3739 | ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, |
3740 | ALC269_FIXUP_ASUS_X101_FUNC, |
3741 | @@ -3412,6 +3424,15 @@ static const struct hda_fixup alc269_fixups[] = { |
3742 | .chained = true, |
3743 | .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC |
3744 | }, |
3745 | + [ALC269_FIXUP_DELL3_MIC_NO_PRESENCE] = { |
3746 | + .type = HDA_FIXUP_PINS, |
3747 | + .v.pins = (const struct hda_pintbl[]) { |
3748 | + { 0x1a, 0x01a1913c }, /* use as headset mic, without its own jack detect */ |
3749 | + { } |
3750 | + }, |
3751 | + .chained = true, |
3752 | + .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC |
3753 | + }, |
3754 | [ALC269_FIXUP_HEADSET_MODE] = { |
3755 | .type = HDA_FIXUP_FUNC, |
3756 | .v.func = alc_fixup_headset_mode, |
3757 | @@ -3420,6 +3441,13 @@ static const struct hda_fixup alc269_fixups[] = { |
3758 | .type = HDA_FIXUP_FUNC, |
3759 | .v.func = alc_fixup_headset_mode_no_hp_mic, |
3760 | }, |
3761 | + [ALC286_FIXUP_SONY_MIC_NO_PRESENCE] = { |
3762 | + .type = HDA_FIXUP_PINS, |
3763 | + .v.pins = (const struct hda_pintbl[]) { |
3764 | + { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */ |
3765 | + { } |
3766 | + }, |
3767 | + }, |
3768 | [ALC269_FIXUP_ASUS_X101_FUNC] = { |
3769 | .type = HDA_FIXUP_FUNC, |
3770 | .v.func = alc269_fixup_x101_headset_mic, |
3771 | @@ -3477,6 +3505,12 @@ static const struct hda_fixup alc269_fixups[] = { |
3772 | .type = HDA_FIXUP_FUNC, |
3773 | .v.func = alc269_fixup_limit_int_mic_boost, |
3774 | }, |
3775 | + [ALC290_FIXUP_MONO_SPEAKERS] = { |
3776 | + .type = HDA_FIXUP_FUNC, |
3777 | + .v.func = alc290_fixup_mono_speakers, |
3778 | + .chained = true, |
3779 | + .chain_id = ALC269_FIXUP_DELL3_MIC_NO_PRESENCE, |
3780 | + }, |
3781 | }; |
3782 | |
3783 | static const struct snd_pci_quirk alc269_fixup_tbl[] = { |
3784 | @@ -3511,6 +3545,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { |
3785 | SND_PCI_QUIRK(0x1028, 0x0608, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), |
3786 | SND_PCI_QUIRK(0x1028, 0x0609, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), |
3787 | SND_PCI_QUIRK(0x1028, 0x0613, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), |
3788 | + SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_MONO_SPEAKERS), |
3789 | SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), |
3790 | SND_PCI_QUIRK(0x103c, 0x18e6, "HP", ALC269_FIXUP_HP_GPIO_LED), |
3791 | SND_PCI_QUIRK(0x103c, 0x1973, "HP Pavilion", ALC269_FIXUP_HP_MUTE_LED_MIC1), |
3792 | @@ -3529,6 +3564,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { |
3793 | SND_PCI_QUIRK(0x1043, 0x8398, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC), |
3794 | SND_PCI_QUIRK(0x1043, 0x83ce, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC), |
3795 | SND_PCI_QUIRK(0x1043, 0x8516, "ASUS X101CH", ALC269_FIXUP_ASUS_X101), |
3796 | + SND_PCI_QUIRK(0x104d, 0x90b6, "Sony VAIO Pro 13", ALC286_FIXUP_SONY_MIC_NO_PRESENCE), |
3797 | SND_PCI_QUIRK(0x104d, 0x9073, "Sony VAIO", ALC275_FIXUP_SONY_VAIO_GPIO2), |
3798 | SND_PCI_QUIRK(0x104d, 0x907b, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ), |
3799 | SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ), |
3800 | @@ -4216,6 +4252,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { |
3801 | SND_PCI_QUIRK(0x1028, 0x05d8, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), |
3802 | SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), |
3803 | SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), |
3804 | + SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_ASUS_MODE4), |
3805 | SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT), |
3806 | SND_PCI_QUIRK(0x105b, 0x0cd6, "Foxconn", ALC662_FIXUP_ASUS_MODE2), |
3807 | SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), |
3808 | diff --git a/sound/usb/usx2y/usbusx2yaudio.c b/sound/usb/usx2y/usbusx2yaudio.c |
3809 | index 0ce90337..cd69a80 100644 |
3810 | --- a/sound/usb/usx2y/usbusx2yaudio.c |
3811 | +++ b/sound/usb/usx2y/usbusx2yaudio.c |
3812 | @@ -299,19 +299,6 @@ static void usX2Y_error_urb_status(struct usX2Ydev *usX2Y, |
3813 | usX2Y_clients_stop(usX2Y); |
3814 | } |
3815 | |
3816 | -static void usX2Y_error_sequence(struct usX2Ydev *usX2Y, |
3817 | - struct snd_usX2Y_substream *subs, struct urb *urb) |
3818 | -{ |
3819 | - snd_printk(KERN_ERR |
3820 | -"Sequence Error!(hcd_frame=%i ep=%i%s;wait=%i,frame=%i).\n" |
3821 | -"Most probably some urb of usb-frame %i is still missing.\n" |
3822 | -"Cause could be too long delays in usb-hcd interrupt handling.\n", |
3823 | - usb_get_current_frame_number(usX2Y->dev), |
3824 | - subs->endpoint, usb_pipein(urb->pipe) ? "in" : "out", |
3825 | - usX2Y->wait_iso_frame, urb->start_frame, usX2Y->wait_iso_frame); |
3826 | - usX2Y_clients_stop(usX2Y); |
3827 | -} |
3828 | - |
3829 | static void i_usX2Y_urb_complete(struct urb *urb) |
3830 | { |
3831 | struct snd_usX2Y_substream *subs = urb->context; |
3832 | @@ -328,12 +315,9 @@ static void i_usX2Y_urb_complete(struct urb *urb) |
3833 | usX2Y_error_urb_status(usX2Y, subs, urb); |
3834 | return; |
3835 | } |
3836 | - if (likely((urb->start_frame & 0xFFFF) == (usX2Y->wait_iso_frame & 0xFFFF))) |
3837 | - subs->completed_urb = urb; |
3838 | - else { |
3839 | - usX2Y_error_sequence(usX2Y, subs, urb); |
3840 | - return; |
3841 | - } |
3842 | + |
3843 | + subs->completed_urb = urb; |
3844 | + |
3845 | { |
3846 | struct snd_usX2Y_substream *capsubs = usX2Y->subs[SNDRV_PCM_STREAM_CAPTURE], |
3847 | *playbacksubs = usX2Y->subs[SNDRV_PCM_STREAM_PLAYBACK]; |
3848 | diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c |
3849 | index f2a1acd..814d0e8 100644 |
3850 | --- a/sound/usb/usx2y/usx2yhwdeppcm.c |
3851 | +++ b/sound/usb/usx2y/usx2yhwdeppcm.c |
3852 | @@ -244,13 +244,8 @@ static void i_usX2Y_usbpcm_urb_complete(struct urb *urb) |
3853 | usX2Y_error_urb_status(usX2Y, subs, urb); |
3854 | return; |
3855 | } |
3856 | - if (likely((urb->start_frame & 0xFFFF) == (usX2Y->wait_iso_frame & 0xFFFF))) |
3857 | - subs->completed_urb = urb; |
3858 | - else { |
3859 | - usX2Y_error_sequence(usX2Y, subs, urb); |
3860 | - return; |
3861 | - } |
3862 | |
3863 | + subs->completed_urb = urb; |
3864 | capsubs = usX2Y->subs[SNDRV_PCM_STREAM_CAPTURE]; |
3865 | capsubs2 = usX2Y->subs[SNDRV_PCM_STREAM_CAPTURE + 2]; |
3866 | playbacksubs = usX2Y->subs[SNDRV_PCM_STREAM_PLAYBACK]; |