diff --git a/Makefile b/Makefile index e9528d2..5c7d3d6 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 16 +SUBLEVEL = 17 EXTRAVERSION = NAME = TOSSUG Baby Fish diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h index 442ce5d..43de302 100644 --- a/arch/arc/include/asm/delay.h +++ b/arch/arc/include/asm/delay.h @@ -53,11 +53,10 @@ static inline void __udelay(unsigned long usecs) { unsigned long loops; - /* (long long) cast ensures 64 bit MPY - real or emulated + /* (u64) cast ensures 64 bit MPY - real or emulated * HZ * 4295 is pre-evaluated by gcc - hence only 2 mpy ops */ - loops = ((long long)(usecs * 4295 * HZ) * - (long long)(loops_per_jiffy)) >> 32; + loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32; __delay(loops); } diff --git a/arch/arc/include/asm/sections.h b/arch/arc/include/asm/sections.h index 6fc1159..764f1e3 100644 --- a/arch/arc/include/asm/sections.h +++ b/arch/arc/include/asm/sections.h @@ -11,7 +11,6 @@ #include -extern char _int_vec_base_lds[]; extern char __arc_dccm_base[]; extern char __dtb_start[]; diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index f158197..b6a8c2d 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -45,7 +45,14 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) static inline void arch_spin_unlock(arch_spinlock_t *lock) { - lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__; + unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; + + __asm__ __volatile__( + " ex %0, [%1] \n" + : "+r" (tmp) + : "r"(&(lock->slock)) + : "memory"); + smp_mb(); } diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index 3242082..30c9baf 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -43,7 +43,7 @@ * Because it essentially checks if buffer end is within limit and @len is * non-ngeative, which implies that buffer start will be within limit too. * - * The reason for rewriting being, for majorit yof cases, @len is generally + * The reason for rewriting being, for majority of cases, @len is generally * compile time constant, causing first sub-expression to be compile time * subsumed. * @@ -53,7 +53,7 @@ * */ #define __user_ok(addr, sz) (((sz) <= TASK_SIZE) && \ - (((addr)+(sz)) <= get_fs())) + ((addr) <= (get_fs() - (sz)))) #define __access_ok(addr, sz) (unlikely(__kernel_ok) || \ likely(__user_ok((addr), (sz)))) diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 006dec3..0f944f0 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -27,11 +27,16 @@ stext: ; Don't clobber r0-r4 yet. It might have bootloader provided info ;------------------------------------------------------------------- + sr @_int_vec_base_lds, [AUX_INTR_VEC_BASE] + #ifdef CONFIG_SMP ; Only Boot (Master) proceeds. Others wait in platform dependent way ; IDENTITY Reg [ 3 2 1 0 ] ; (cpu-id) ^^^ => Zero for UP ARC700 ; => #Core-ID if SMP (Master 0) + ; Note that non-boot CPUs might not land here if halt-on-reset and + ; instead breath life from @first_lines_of_secondary, but we still + ; need to make sure only boot cpu takes this path. GET_CPU_ID r5 cmp r5, 0 jnz arc_platform_smp_wait_to_boot @@ -96,6 +101,8 @@ stext: first_lines_of_secondary: + sr @_int_vec_base_lds, [AUX_INTR_VEC_BASE] + ; setup per-cpu idle task as "current" on this CPU ld r0, [@secondary_idle_tsk] SET_CURR_TASK_ON_CPU r0, r1 diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c index 8115fa5..a199471 100644 --- a/arch/arc/kernel/irq.c +++ b/arch/arc/kernel/irq.c @@ -24,7 +24,6 @@ * -Needed for each CPU (hence not foldable into init_IRQ) * * what it does ? - * -setup Vector Table Base Reg - in case Linux not linked at 0x8000_0000 * -Disable all IRQs (on CPU side) * -Optionally, setup the High priority Interrupts as Level 2 IRQs */ @@ -32,8 +31,6 @@ void __cpuinit arc_init_IRQ(void) { int level_mask = 0; - write_aux_reg(AUX_INTR_VEC_BASE, _int_vec_base_lds); - /* Disable all IRQs: enable them as devices request */ write_aux_reg(AUX_IENABLE, 0); diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c index c6a81c5..0851604 100644 --- a/arch/arc/kernel/ptrace.c +++ b/arch/arc/kernel/ptrace.c @@ -92,7 +92,7 @@ static int genregs_set(struct task_struct *target, REG_IN_CHUNK(scratch, callee, ptregs); /* pt_regs[bta..orig_r8] */ REG_IN_CHUNK(callee, efa, cregs); /* callee_regs[r25..r13] */ REG_IGNORE_ONE(efa); /* efa update invalid */ - REG_IN_ONE(stop_pc, &ptregs->ret); /* stop_pc: PC update */ + REG_IGNORE_ONE(stop_pc); /* PC updated via @ret */ return ret; } diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index b2b3731..2d7786b 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -47,10 +47,7 @@ void __cpuinit read_arc_build_cfg_regs(void) READ_BCR(AUX_IDENTITY, cpu->core); cpu->timers = read_aux_reg(ARC_REG_TIMERS_BCR); - cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE); - if (cpu->vec_base == 0) - cpu->vec_base = (unsigned int)_int_vec_base_lds; READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space); cpu->uncached_base = uncached_space.start << 24; diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index ee6ef2f..7e95e1a 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -101,7 +101,6 @@ SYSCALL_DEFINE0(rt_sigreturn) { struct rt_sigframe __user *sf; unsigned int magic; - int err; struct pt_regs *regs = current_pt_regs(); /* Always make any pending restarted system calls return -EINTR */ @@ -119,15 +118,16 @@ SYSCALL_DEFINE0(rt_sigreturn) if (!access_ok(VERIFY_READ, sf, sizeof(*sf))) goto badframe; - err = restore_usr_regs(regs, sf); - err |= __get_user(magic, &sf->sigret_magic); - if (err) + if (__get_user(magic, &sf->sigret_magic)) goto badframe; if (unlikely(is_do_ss_needed(magic))) if (restore_altstack(&sf->uc.uc_stack)) goto badframe; + if (restore_usr_regs(regs, sf)) + goto badframe; + /* Don't restart from sigreturn */ syscall_wont_restart(regs); @@ -191,6 +191,15 @@ setup_rt_frame(int signo, struct k_sigaction *ka, siginfo_t *info, return 1; /* + * w/o SA_SIGINFO, struct ucontext is partially populated (only + * uc_mcontext/uc_sigmask) for kernel's normal user state preservation + * during signal handler execution. This works for SA_SIGINFO as well + * although the semantics are now overloaded (the same reg state can be + * inspected by userland: but are they allowed to fiddle with it ? + */ + err |= stash_usr_regs(sf, regs, set); + + /* * SA_SIGINFO requires 3 args to signal handler: * #1: sig-no (common to any handler) * #2: struct siginfo @@ -213,14 +222,6 @@ setup_rt_frame(int signo, struct k_sigaction *ka, siginfo_t *info, magic = MAGIC_SIGALTSTK; } - /* - * w/o SA_SIGINFO, struct ucontext is partially populated (only - * uc_mcontext/uc_sigmask) for kernel's normal user state preservation - * during signal handler execution. This works for SA_SIGINFO as well - * although the semantics are now overloaded (the same reg state can be - * inspected by userland: but are they allowed to fiddle with it ? - */ - err |= stash_usr_regs(sf, regs, set); err |= __put_user(magic, &sf->sigret_magic); if (err) return err; diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c index 4cd8163..116d3e0 100644 --- a/arch/arc/kernel/unaligned.c +++ b/arch/arc/kernel/unaligned.c @@ -233,6 +233,12 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs, regs->status32 &= ~STATUS_DE_MASK; } else { regs->ret += state.instr_len; + + /* handle zero-overhead-loop */ + if ((regs->ret == regs->lp_end) && (regs->lp_count)) { + regs->ret = regs->lp_start; + regs->lp_count--; + } } return 0; diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index bfc198c..863c892 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -16,7 +16,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\n\t" + asm_volatile_goto("1:\n\t" JUMP_LABEL_NOP "\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 5bc2615..ab1fe3b 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -466,7 +466,7 @@ int in_gate_area_no_mm(unsigned long addr) { return in_gate_area(NULL, addr); } -#define is_gate_vma(vma) ((vma) = &gate_vma) +#define is_gate_vma(vma) ((vma) == &gate_vma) #else #define is_gate_vma(vma) 0 #endif diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 4d6d77e..e194f95 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -22,7 +22,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\tnop\n\t" + asm_volatile_goto("1:\tnop\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 04e47c6..b3f87a3 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -805,14 +805,14 @@ void notrace handle_interruption(int code, struct pt_regs *regs) else { /* - * The kernel should never fault on its own address space. + * The kernel should never fault on its own address space, + * unless pagefault_disable() was called before. */ - if (fault_space == 0) + if (fault_space == 0 && !in_atomic()) { pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC); parisc_terminate("Kernel Fault", regs, code, fault_address); - } } diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index ae098c4..f016bb6 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -19,7 +19,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\n\t" + asm_volatile_goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index b02f91e..7bcd4d6 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1054,7 +1054,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) BEGIN_FTR_SECTION mfspr r8, SPRN_DSCR ld r7, HSTATE_DSCR(r13) - std r8, VCPU_DSCR(r7) + std r8, VCPU_DSCR(r9) mtspr SPRN_DSCR, r7 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 6c32190..346b1c8 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -15,7 +15,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("0: brcl 0,0\n" + asm_volatile_goto("0: brcl 0,0\n" ".pushsection __jump_table, \"aw\"\n" ASM_ALIGN "\n" ASM_PTR " 0b, %l[label], %0\n" diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index 5080d16..ec2e2e2 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -9,7 +9,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\n\t" + asm_volatile_goto("1:\n\t" "nop\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index e99ac27..4af181d 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -365,7 +365,7 @@ extern const char * const x86_power_flags[32]; static __always_inline __pure bool __static_cpu_has(u16 bit) { #if __GNUC__ > 4 || __GNUC_MINOR__ >= 5 - asm goto("1: jmp %l[t_no]\n" + asm_volatile_goto("1: jmp %l[t_no]\n" "2:\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index cccd07f..779c2ef 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -29,7 +29,7 @@ extern void e820_setup_gap(void); extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, unsigned long start_addr, unsigned long long end_addr); struct setup_data; -extern void parse_e820_ext(struct setup_data *data); +extern void parse_e820_ext(u64 phys_addr, u32 data_len); #if defined(CONFIG_X86_64) || \ (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 3a16c14..0297669 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -13,7 +13,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:" + asm_volatile_goto("1:" STATIC_KEY_INITIAL_NOP ".pushsection __jump_table, \"aw\" \n\t" _ASM_ALIGN "\n\t" diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index d32abea..174da5f 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -658,15 +658,18 @@ __init void e820_setup_gap(void) * boot_params.e820_map, others are passed via SETUP_E820_EXT node of * linked list of struct setup_data, which is parsed here. */ -void __init parse_e820_ext(struct setup_data *sdata) +void __init parse_e820_ext(u64 phys_addr, u32 data_len) { int entries; struct e820entry *extmap; + struct setup_data *sdata; + sdata = early_memremap(phys_addr, data_len); entries = sdata->len / sizeof(struct e820entry); extmap = (struct e820entry *)(sdata->data); __append_e820_map(extmap, entries); sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + early_iounmap(sdata, data_len); printk(KERN_INFO "e820: extended physical RAM map:\n"); e820_print_map("extended"); } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 56f7fcf..91964c6 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -426,25 +426,23 @@ static void __init reserve_initrd(void) static void __init parse_setup_data(void) { struct setup_data *data; - u64 pa_data; + u64 pa_data, pa_next; pa_data = boot_params.hdr.setup_data; while (pa_data) { - u32 data_len, map_len; + u32 data_len, map_len, data_type; map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK), (u64)sizeof(struct setup_data)); data = early_memremap(pa_data, map_len); data_len = data->len + sizeof(struct setup_data); - if (data_len > map_len) { - early_iounmap(data, map_len); - data = early_memremap(pa_data, data_len); - map_len = data_len; - } + data_type = data->type; + pa_next = data->next; + early_iounmap(data, map_len); - switch (data->type) { + switch (data_type) { case SETUP_E820_EXT: - parse_e820_ext(data); + parse_e820_ext(pa_data, data_len); break; case SETUP_DTB: add_dtb(pa_data); @@ -452,8 +450,7 @@ static void __init parse_setup_data(void) default: break; } - pa_data = data->next; - early_iounmap(data, map_len); + pa_data = pa_next; } } diff --git a/drivers/char/random.c b/drivers/char/random.c index 35487e8..81eefa1 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1462,12 +1462,11 @@ ctl_table random_table[] = { static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; -static int __init random_int_secret_init(void) +int random_int_secret_init(void) { get_random_bytes(random_int_secret, sizeof(random_int_secret)); return 0; } -late_initcall(random_int_secret_init); /* * Get a random word for internal kernel use only. Similar to urandom but diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2667d6d..ab95259 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3946,8 +3946,6 @@ static void intel_connector_check_state(struct intel_connector *connector) * consider. */ void intel_connector_dpms(struct drm_connector *connector, int mode) { - struct intel_encoder *encoder = intel_attached_encoder(connector); - /* All the simple cases only support two dpms states. */ if (mode != DRM_MODE_DPMS_ON) mode = DRM_MODE_DPMS_OFF; @@ -3958,10 +3956,8 @@ void intel_connector_dpms(struct drm_connector *connector, int mode) connector->dpms = mode; /* Only need to change hw state when actually enabled */ - if (encoder->base.crtc) - intel_encoder_dpms(encoder, mode); - else - WARN_ON(encoder->connectors_active != false); + if (connector->encoder) + intel_encoder_dpms(to_intel_encoder(connector->encoder), mode); intel_modeset_check_state(connector->dev); } diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 2068df1..8b6b0ba 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2990,7 +2990,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev) rdev->config.evergreen.sx_max_export_size = 256; rdev->config.evergreen.sx_max_export_pos_size = 64; rdev->config.evergreen.sx_max_export_smx_size = 192; - rdev->config.evergreen.max_hw_contexts = 8; + rdev->config.evergreen.max_hw_contexts = 4; rdev->config.evergreen.sq_num_cf_insts = 2; rdev->config.evergreen.sc_prim_fifo_size = 0x40; diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index 9490972..150e318 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -1104,7 +1104,7 @@ * 6. COMMAND [29:22] | BYTE_COUNT [20:0] */ # define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20) - /* 0 - SRC_ADDR + /* 0 - DST_ADDR * 1 - GDS */ # define PACKET3_CP_DMA_ENGINE(x) ((x) << 27) @@ -1119,7 +1119,7 @@ # define PACKET3_CP_DMA_CP_SYNC (1 << 31) /* COMMAND */ # define PACKET3_CP_DMA_DIS_WC (1 << 21) -# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22) /* 0 - none * 1 - 8 in 16 * 2 - 8 in 32 diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index 79df558..2fd2241 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -1259,7 +1259,7 @@ */ # define PACKET3_CP_DMA_CP_SYNC (1 << 31) /* COMMAND */ -# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22) /* 0 - none * 1 - 8 in 16 * 2 - 8 in 32 diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index bbed4af..f9ebf2b 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -37,8 +37,8 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag) struct radeon_bo **gtt_obj = NULL; struct radeon_fence *fence = NULL; uint64_t gtt_addr, vram_addr; - unsigned i, n, size; - int r, ring; + unsigned n, size; + int i, r, ring; switch (flag) { case RADEON_TEST_COPY_DMA: diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 8c68e67..495f41f 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -928,7 +928,7 @@ * 6. COMMAND [30:21] | BYTE_COUNT [20:0] */ # define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20) - /* 0 - SRC_ADDR + /* 0 - DST_ADDR * 1 - GDS */ # define PACKET3_CP_DMA_ENGINE(x) ((x) << 27) @@ -943,7 +943,7 @@ # define PACKET3_CP_DMA_CP_SYNC (1 << 31) /* COMMAND */ # define PACKET3_CP_DMA_DIS_WC (1 << 21) -# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22) /* 0 - none * 1 - 8 in 16 * 2 - 8 in 32 diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index 98814d1..3288f13 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -230,6 +230,7 @@ static int send_argument(const char *key) static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) { + u8 status, data = 0; int i; if (send_command(cmd) || send_argument(key)) { @@ -237,6 +238,7 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) return -EIO; } + /* This has no effect on newer (2012) SMCs */ if (send_byte(len, APPLESMC_DATA_PORT)) { pr_warn("%.4s: read len fail\n", key); return -EIO; @@ -250,6 +252,17 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) buffer[i] = inb(APPLESMC_DATA_PORT); } + /* Read the data port until bit0 is cleared */ + for (i = 0; i < 16; i++) { + udelay(APPLESMC_MIN_WAIT); + status = inb(APPLESMC_CMD_PORT); + if (!(status & 0x01)) + break; + data = inb(APPLESMC_DATA_PORT); + } + if (i) + pr_warn("flushed %d bytes, last value is: %d\n", i, data); + return 0; } diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index e02f9e3..b06be8e 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -941,6 +941,9 @@ omap_i2c_isr_thread(int this_irq, void *dev_id) /* * ProDB0017052: Clear ARDY bit twice */ + if (stat & OMAP_I2C_STAT_ARDY) + omap_i2c_ack_stat(dev, OMAP_I2C_STAT_ARDY); + if (stat & (OMAP_I2C_STAT_ARDY | OMAP_I2C_STAT_NACK | OMAP_I2C_STAT_AL)) { omap_i2c_ack_stat(dev, (OMAP_I2C_STAT_RRDY | diff --git a/drivers/watchdog/ts72xx_wdt.c b/drivers/watchdog/ts72xx_wdt.c index b8a9245..9ad2bd3 100644 --- a/drivers/watchdog/ts72xx_wdt.c +++ b/drivers/watchdog/ts72xx_wdt.c @@ -310,7 +310,8 @@ static long ts72xx_wdt_ioctl(struct file *file, unsigned int cmd, case WDIOC_GETSTATUS: case WDIOC_GETBOOTSTATUS: - return put_user(0, p); + error = put_user(0, p); + break; case WDIOC_KEEPALIVE: ts72xx_wdt_kick(wdt); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 17f3064..1e2288d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8146,7 +8146,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* check for collisions, even if the name isn't there */ - ret = btrfs_check_dir_item_collision(root, new_dir->i_ino, + ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, new_dentry->d_name.name, new_dentry->d_name.len); diff --git a/fs/dcache.c b/fs/dcache.c index f09b908..da89cdf 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2724,6 +2724,17 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, return memcpy(buffer, temp, sz); } +char *simple_dname(struct dentry *dentry, char *buffer, int buflen) +{ + char *end = buffer + buflen; + /* these dentries are never renamed, so d_lock is not needed */ + if (prepend(&end, &buflen, " (deleted)", 11) || + prepend_name(&end, &buflen, &dentry->d_name) || + prepend(&end, &buflen, "/", 1)) + end = ERR_PTR(-ENAMETOOLONG); + return end; +} + /* * Write full pathname from the root of the filesystem into the buffer. */ diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index c081e34..03e9beb 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1350,6 +1350,8 @@ retry: s_min_extra_isize) { tried_min_extra_isize++; new_extra_isize = s_min_extra_isize; + kfree(is); is = NULL; + kfree(bs); bs = NULL; goto retry; } error = -1; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a3f868a..4e5f332 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -916,14 +916,8 @@ static int get_hstate_idx(int page_size_log) return h - hstates; } -static char *hugetlb_dname(struct dentry *dentry, char *buffer, int buflen) -{ - return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)", - dentry->d_name.name); -} - static struct dentry_operations anon_ops = { - .d_dname = hugetlb_dname + .d_dname = simple_dname }; /* diff --git a/fs/statfs.c b/fs/statfs.c index c219e733..083dc0a 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -94,7 +94,7 @@ retry: int fd_statfs(int fd, struct kstatfs *st) { - struct fd f = fdget(fd); + struct fd f = fdget_raw(fd); int error = -EBADF; if (f.file) { error = vfs_statfs(&f.file->f_path, st); diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 842de22..ded4299 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -65,6 +65,21 @@ #define __visible __attribute__((externally_visible)) #endif +/* + * GCC 'asm goto' miscompiles certain code sequences: + * + * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 + * + * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. + * Fixed in GCC 4.8.2 and later versions. + * + * (asm goto is automatically volatile - the naming reflects this.) + */ +#if GCC_VERSION <= 40801 +# define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) +#else +# define asm_volatile_goto(x...) do { asm goto(x); } while (0) +#endif #ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP #if GCC_VERSION >= 40400 diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 1a6bb81..9be5ac9 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -332,6 +332,7 @@ extern int d_validate(struct dentry *, struct dentry *); * helper function for dentry_operations.d_dname() members */ extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); +extern char *simple_dname(struct dentry *, char *, int); extern char *__d_path(const struct path *, const struct path *, char *, int); extern char *d_absolute_path(const struct path *, char *, int); diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index c4d870b..19c19a5 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -22,7 +22,7 @@ struct ipc_ids { int in_use; unsigned short seq; unsigned short seq_max; - struct rw_semaphore rw_mutex; + struct rw_semaphore rwsem; struct idr ipcs_idr; int next_id; }; diff --git a/include/linux/random.h b/include/linux/random.h index 3b9377d..6312dd9 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -17,6 +17,7 @@ extern void add_interrupt_randomness(int irq, int irq_flags); extern void get_random_bytes(void *buf, int nbytes); extern void get_random_bytes_arch(void *buf, int nbytes); void generate_random_uuid(unsigned char uuid_out[16]); +extern int random_int_secret_init(void); #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; diff --git a/include/linux/sem.h b/include/linux/sem.h index 53d4265..976ce3a 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -12,10 +12,12 @@ struct task_struct; struct sem_array { struct kern_ipc_perm ____cacheline_aligned_in_smp sem_perm; /* permissions .. see ipc.h */ - time_t sem_otime; /* last semop time */ time_t sem_ctime; /* last change time */ struct sem *sem_base; /* ptr to first semaphore in array */ - struct list_head sem_pending; /* pending operations to be processed */ + struct list_head pending_alter; /* pending operations */ + /* that alter the array */ + struct list_head pending_const; /* pending complex operations */ + /* that do not alter semvals */ struct list_head list_id; /* undo requests on this array */ int sem_nsems; /* no. of semaphores in array */ int complex_count; /* pending complex operations */ diff --git a/init/main.c b/init/main.c index 9484f4b..e83ac04 100644 --- a/init/main.c +++ b/init/main.c @@ -74,6 +74,7 @@ #include #include #include +#include #include #include @@ -777,6 +778,7 @@ static void __init do_basic_setup(void) do_ctors(); usermodehelper_enable(); do_initcalls(); + random_int_secret_init(); } static void __init do_pre_smp_initcalls(void) diff --git a/ipc/msg.c b/ipc/msg.c index f8fbe2c..558aa91 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -70,8 +70,6 @@ struct msg_sender { #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) -#define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) - static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); static int newque(struct ipc_namespace *, struct ipc_params *); #ifdef CONFIG_PROC_FS @@ -141,27 +139,23 @@ void __init msg_init(void) IPC_MSG_IDS, sysvipc_msg_proc_show); } -/* - * msg_lock_(check_) routines are called in the paths where the rw_mutex - * is not held. - */ -static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id) +static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id) { - struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id); + struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id); if (IS_ERR(ipcp)) - return (struct msg_queue *)ipcp; + return ERR_CAST(ipcp); return container_of(ipcp, struct msg_queue, q_perm); } -static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns, - int id) +static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns, + int id) { - struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id); + struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id); if (IS_ERR(ipcp)) - return (struct msg_queue *)ipcp; + return ERR_CAST(ipcp); return container_of(ipcp, struct msg_queue, q_perm); } @@ -171,12 +165,21 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) ipc_rmid(&msg_ids(ns), &s->q_perm); } +static void msg_rcu_free(struct rcu_head *head) +{ + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); + struct msg_queue *msq = ipc_rcu_to_struct(p); + + security_msg_queue_free(msq); + ipc_rcu_free(head); +} + /** * newque - Create a new msg queue * @ns: namespace * @params: ptr to the structure that contains the key and msgflg * - * Called with msg_ids.rw_mutex held (writer) + * Called with msg_ids.rwsem held (writer) */ static int newque(struct ipc_namespace *ns, struct ipc_params *params) { @@ -195,17 +198,14 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) msq->q_perm.security = NULL; retval = security_msg_queue_alloc(msq); if (retval) { - ipc_rcu_putref(msq); + ipc_rcu_putref(msq, ipc_rcu_free); return retval; } - /* - * ipc_addid() locks msq - */ + /* ipc_addid() locks msq upon success. */ id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); if (id < 0) { - security_msg_queue_free(msq); - ipc_rcu_putref(msq); + ipc_rcu_putref(msq, msg_rcu_free); return id; } @@ -218,7 +218,8 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) INIT_LIST_HEAD(&msq->q_receivers); INIT_LIST_HEAD(&msq->q_senders); - msg_unlock(msq); + ipc_unlock_object(&msq->q_perm); + rcu_read_unlock(); return msq->q_perm.id; } @@ -264,8 +265,8 @@ static void expunge_all(struct msg_queue *msq, int res) * removes the message queue from message queue ID IDR, and cleans up all the * messages associated with this queue. * - * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held - * before freeque() is called. msg_ids.rw_mutex remains locked on exit. + * msg_ids.rwsem (writer) and the spinlock for this message queue are held + * before freeque() is called. msg_ids.rwsem remains locked on exit. */ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) { @@ -275,19 +276,19 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) expunge_all(msq, -EIDRM); ss_wakeup(&msq->q_senders, 1); msg_rmid(ns, msq); - msg_unlock(msq); + ipc_unlock_object(&msq->q_perm); + rcu_read_unlock(); list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { atomic_dec(&ns->msg_hdrs); free_msg(msg); } atomic_sub(msq->q_cbytes, &ns->msg_bytes); - security_msg_queue_free(msq); - ipc_rcu_putref(msq); + ipc_rcu_putref(msq, msg_rcu_free); } /* - * Called with msg_ids.rw_mutex and ipcp locked. + * Called with msg_ids.rwsem and ipcp locked. */ static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) { @@ -391,9 +392,9 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) } /* - * This function handles some msgctl commands which require the rw_mutex + * This function handles some msgctl commands which require the rwsem * to be held in write mode. - * NOTE: no locks must be held, the rw_mutex is taken inside this function. + * NOTE: no locks must be held, the rwsem is taken inside this function. */ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, struct msqid_ds __user *buf, int version) @@ -408,31 +409,39 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, return -EFAULT; } - ipcp = ipcctl_pre_down(ns, &msg_ids(ns), msqid, cmd, - &msqid64.msg_perm, msqid64.msg_qbytes); - if (IS_ERR(ipcp)) - return PTR_ERR(ipcp); + down_write(&msg_ids(ns).rwsem); + rcu_read_lock(); + + ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, + &msqid64.msg_perm, msqid64.msg_qbytes); + if (IS_ERR(ipcp)) { + err = PTR_ERR(ipcp); + goto out_unlock1; + } msq = container_of(ipcp, struct msg_queue, q_perm); err = security_msg_queue_msgctl(msq, cmd); if (err) - goto out_unlock; + goto out_unlock1; switch (cmd) { case IPC_RMID: + ipc_lock_object(&msq->q_perm); + /* freeque unlocks the ipc object and rcu */ freeque(ns, ipcp); goto out_up; case IPC_SET: if (msqid64.msg_qbytes > ns->msg_ctlmnb && !capable(CAP_SYS_RESOURCE)) { err = -EPERM; - goto out_unlock; + goto out_unlock1; } + ipc_lock_object(&msq->q_perm); err = ipc_update_perm(&msqid64.msg_perm, ipcp); if (err) - goto out_unlock; + goto out_unlock0; msq->q_qbytes = msqid64.msg_qbytes; @@ -448,25 +457,23 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, break; default: err = -EINVAL; + goto out_unlock1; } -out_unlock: - msg_unlock(msq); + +out_unlock0: + ipc_unlock_object(&msq->q_perm); +out_unlock1: + rcu_read_unlock(); out_up: - up_write(&msg_ids(ns).rw_mutex); + up_write(&msg_ids(ns).rwsem); return err; } -SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) +static int msgctl_nolock(struct ipc_namespace *ns, int msqid, + int cmd, int version, void __user *buf) { + int err; struct msg_queue *msq; - int err, version; - struct ipc_namespace *ns; - - if (msqid < 0 || cmd < 0) - return -EINVAL; - - version = ipc_parse_version(&cmd); - ns = current->nsproxy->ipc_ns; switch (cmd) { case IPC_INFO: @@ -477,6 +484,7 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) if (!buf) return -EFAULT; + /* * We must not return kernel stack data. * due to padding, it's not enough @@ -492,7 +500,7 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) msginfo.msgmnb = ns->msg_ctlmnb; msginfo.msgssz = MSGSSZ; msginfo.msgseg = MSGSEG; - down_read(&msg_ids(ns).rw_mutex); + down_read(&msg_ids(ns).rwsem); if (cmd == MSG_INFO) { msginfo.msgpool = msg_ids(ns).in_use; msginfo.msgmap = atomic_read(&ns->msg_hdrs); @@ -503,12 +511,13 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) msginfo.msgtql = MSGTQL; } max_id = ipc_get_maxid(&msg_ids(ns)); - up_read(&msg_ids(ns).rw_mutex); + up_read(&msg_ids(ns).rwsem); if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) return -EFAULT; return (max_id < 0) ? 0 : max_id; } - case MSG_STAT: /* msqid is an index rather than a msg queue id */ + + case MSG_STAT: case IPC_STAT: { struct msqid64_ds tbuf; @@ -517,17 +526,25 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) if (!buf) return -EFAULT; + memset(&tbuf, 0, sizeof(tbuf)); + + rcu_read_lock(); if (cmd == MSG_STAT) { - msq = msg_lock(ns, msqid); - if (IS_ERR(msq)) - return PTR_ERR(msq); + msq = msq_obtain_object(ns, msqid); + if (IS_ERR(msq)) { + err = PTR_ERR(msq); + goto out_unlock; + } success_return = msq->q_perm.id; } else { - msq = msg_lock_check(ns, msqid); - if (IS_ERR(msq)) - return PTR_ERR(msq); + msq = msq_obtain_object_check(ns, msqid); + if (IS_ERR(msq)) { + err = PTR_ERR(msq); + goto out_unlock; + } success_return = 0; } + err = -EACCES; if (ipcperms(ns, &msq->q_perm, S_IRUGO)) goto out_unlock; @@ -536,8 +553,6 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) if (err) goto out_unlock; - memset(&tbuf, 0, sizeof(tbuf)); - kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); tbuf.msg_stime = msq->q_stime; tbuf.msg_rtime = msq->q_rtime; @@ -547,24 +562,48 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) tbuf.msg_qbytes = msq->q_qbytes; tbuf.msg_lspid = msq->q_lspid; tbuf.msg_lrpid = msq->q_lrpid; - msg_unlock(msq); + rcu_read_unlock(); + if (copy_msqid_to_user(buf, &tbuf, version)) return -EFAULT; return success_return; } - case IPC_SET: - case IPC_RMID: - err = msgctl_down(ns, msqid, cmd, buf, version); - return err; + default: - return -EINVAL; + return -EINVAL; } + return err; out_unlock: - msg_unlock(msq); + rcu_read_unlock(); return err; } +SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) +{ + int version; + struct ipc_namespace *ns; + + if (msqid < 0 || cmd < 0) + return -EINVAL; + + version = ipc_parse_version(&cmd); + ns = current->nsproxy->ipc_ns; + + switch (cmd) { + case IPC_INFO: + case MSG_INFO: + case MSG_STAT: /* msqid is an index rather than a msg queue id */ + case IPC_STAT: + return msgctl_nolock(ns, msqid, cmd, version, buf); + case IPC_SET: + case IPC_RMID: + return msgctl_down(ns, msqid, cmd, buf, version); + default: + return -EINVAL; + } +} + static int testmsg(struct msg_msg *msg, long type, int mode) { switch(mode) @@ -640,22 +679,31 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, msg->m_type = mtype; msg->m_ts = msgsz; - msq = msg_lock_check(ns, msqid); + rcu_read_lock(); + msq = msq_obtain_object_check(ns, msqid); if (IS_ERR(msq)) { err = PTR_ERR(msq); - goto out_free; + goto out_unlock1; } + ipc_lock_object(&msq->q_perm); + for (;;) { struct msg_sender s; err = -EACCES; if (ipcperms(ns, &msq->q_perm, S_IWUGO)) - goto out_unlock_free; + goto out_unlock0; + + /* raced with RMID? */ + if (msq->q_perm.deleted) { + err = -EIDRM; + goto out_unlock0; + } err = security_msg_queue_msgsnd(msq, msg, msgflg); if (err) - goto out_unlock_free; + goto out_unlock0; if (msgsz + msq->q_cbytes <= msq->q_qbytes && 1 + msq->q_qnum <= msq->q_qbytes) { @@ -665,32 +713,37 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, /* queue full, wait: */ if (msgflg & IPC_NOWAIT) { err = -EAGAIN; - goto out_unlock_free; + goto out_unlock0; } + ss_add(msq, &s); if (!ipc_rcu_getref(msq)) { err = -EIDRM; - goto out_unlock_free; + goto out_unlock0; } - msg_unlock(msq); + ipc_unlock_object(&msq->q_perm); + rcu_read_unlock(); schedule(); - ipc_lock_by_ptr(&msq->q_perm); - ipc_rcu_putref(msq); + rcu_read_lock(); + ipc_lock_object(&msq->q_perm); + + ipc_rcu_putref(msq, ipc_rcu_free); if (msq->q_perm.deleted) { err = -EIDRM; - goto out_unlock_free; + goto out_unlock0; } + ss_del(&s); if (signal_pending(current)) { err = -ERESTARTNOHAND; - goto out_unlock_free; + goto out_unlock0; } - } + } msq->q_lspid = task_tgid_vnr(current); msq->q_stime = get_seconds(); @@ -706,9 +759,10 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, err = 0; msg = NULL; -out_unlock_free: - msg_unlock(msq); -out_free: +out_unlock0: + ipc_unlock_object(&msq->q_perm); +out_unlock1: + rcu_read_unlock(); if (msg != NULL) free_msg(msg); return err; @@ -817,21 +871,19 @@ static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) return found ?: ERR_PTR(-EAGAIN); } - -long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, - int msgflg, +long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, long (*msg_handler)(void __user *, struct msg_msg *, size_t)) { - struct msg_queue *msq; - struct msg_msg *msg; int mode; + struct msg_queue *msq; struct ipc_namespace *ns; - struct msg_msg *copy = NULL; + struct msg_msg *msg, *copy = NULL; ns = current->nsproxy->ipc_ns; if (msqid < 0 || (long) bufsz < 0) return -EINVAL; + if (msgflg & MSG_COPY) { copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); if (IS_ERR(copy)) @@ -839,8 +891,10 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, } mode = convert_mode(&msgtyp, msgflg); - msq = msg_lock_check(ns, msqid); + rcu_read_lock(); + msq = msq_obtain_object_check(ns, msqid); if (IS_ERR(msq)) { + rcu_read_unlock(); free_copy(copy); return PTR_ERR(msq); } @@ -850,10 +904,17 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, msg = ERR_PTR(-EACCES); if (ipcperms(ns, &msq->q_perm, S_IRUGO)) - goto out_unlock; + goto out_unlock1; - msg = find_msg(msq, &msgtyp, mode); + ipc_lock_object(&msq->q_perm); + + /* raced with RMID? */ + if (msq->q_perm.deleted) { + msg = ERR_PTR(-EIDRM); + goto out_unlock0; + } + msg = find_msg(msq, &msgtyp, mode); if (!IS_ERR(msg)) { /* * Found a suitable message. @@ -861,7 +922,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, */ if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { msg = ERR_PTR(-E2BIG); - goto out_unlock; + goto out_unlock0; } /* * If we are copying, then do not unlink message and do @@ -869,8 +930,9 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, */ if (msgflg & MSG_COPY) { msg = copy_msg(msg, copy); - goto out_unlock; + goto out_unlock0; } + list_del(&msg->m_list); msq->q_qnum--; msq->q_rtime = get_seconds(); @@ -879,14 +941,16 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, atomic_sub(msg->m_ts, &ns->msg_bytes); atomic_dec(&ns->msg_hdrs); ss_wakeup(&msq->q_senders, 0); - msg_unlock(msq); - break; + + goto out_unlock0; } + /* No message waiting. Wait for a message */ if (msgflg & IPC_NOWAIT) { msg = ERR_PTR(-ENOMSG); - goto out_unlock; + goto out_unlock0; } + list_add_tail(&msr_d.r_list, &msq->q_receivers); msr_d.r_tsk = current; msr_d.r_msgtype = msgtyp; @@ -897,8 +961,9 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, msr_d.r_maxsize = bufsz; msr_d.r_msg = ERR_PTR(-EAGAIN); current->state = TASK_INTERRUPTIBLE; - msg_unlock(msq); + ipc_unlock_object(&msq->q_perm); + rcu_read_unlock(); schedule(); /* Lockless receive, part 1: @@ -909,7 +974,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, * Prior to destruction, expunge_all(-EIRDM) changes r_msg. * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. * rcu_read_lock() prevents preemption between reading r_msg - * and the spin_lock() inside ipc_lock_by_ptr(). + * and acquiring the q_perm.lock in ipc_lock_object(). */ rcu_read_lock(); @@ -928,32 +993,34 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, * If there is a message or an error then accept it without * locking. */ - if (msg != ERR_PTR(-EAGAIN)) { - rcu_read_unlock(); - break; - } + if (msg != ERR_PTR(-EAGAIN)) + goto out_unlock1; /* Lockless receive, part 3: * Acquire the queue spinlock. */ - ipc_lock_by_ptr(&msq->q_perm); - rcu_read_unlock(); + ipc_lock_object(&msq->q_perm); /* Lockless receive, part 4: * Repeat test after acquiring the spinlock. */ msg = (struct msg_msg*)msr_d.r_msg; if (msg != ERR_PTR(-EAGAIN)) - goto out_unlock; + goto out_unlock0; list_del(&msr_d.r_list); if (signal_pending(current)) { msg = ERR_PTR(-ERESTARTNOHAND); -out_unlock: - msg_unlock(msq); - break; + goto out_unlock0; } + + ipc_unlock_object(&msq->q_perm); } + +out_unlock0: + ipc_unlock_object(&msq->q_perm); +out_unlock1: + rcu_read_unlock(); if (IS_ERR(msg)) { free_copy(copy); return PTR_ERR(msg); diff --git a/ipc/namespace.c b/ipc/namespace.c index 7ee61bf..aba9a58 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -81,7 +81,7 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, int next_id; int total, in_use; - down_write(&ids->rw_mutex); + down_write(&ids->rwsem); in_use = ids->in_use; @@ -89,11 +89,12 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, perm = idr_find(&ids->ipcs_idr, next_id); if (perm == NULL) continue; - ipc_lock_by_ptr(perm); + rcu_read_lock(); + ipc_lock_object(perm); free(ns, perm); total++; } - up_write(&ids->rw_mutex); + up_write(&ids->rwsem); } static void free_ipc_ns(struct ipc_namespace *ns) diff --git a/ipc/sem.c b/ipc/sem.c index 70480a3..8c4f59b 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -95,8 +95,12 @@ struct sem { int semval; /* current value */ int sempid; /* pid of last operation */ spinlock_t lock; /* spinlock for fine-grained semtimedop */ - struct list_head sem_pending; /* pending single-sop operations */ -}; + struct list_head pending_alter; /* pending single-sop operations */ + /* that alter the semaphore */ + struct list_head pending_const; /* pending single-sop operations */ + /* that do not alter the semaphore*/ + time_t sem_otime; /* candidate for sem_otime */ +} ____cacheline_aligned_in_smp; /* One queue for each sleeping process in the system. */ struct sem_queue { @@ -150,12 +154,15 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it); #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ /* - * linked list protection: + * Locking: * sem_undo.id_next, - * sem_array.sem_pending{,last}, - * sem_array.sem_undo: sem_lock() for read/write + * sem_array.complex_count, + * sem_array.pending{_alter,_cont}, + * sem_array.sem_undo: global sem_lock() for read/write * sem_undo.proc_next: only "current" is allowed to read/write that field. * + * sem_array.sem_base[i].pending_{const,alter}: + * global or semaphore sem_lock() for read/write */ #define sc_semmsl sem_ctls[0] @@ -189,77 +196,176 @@ void __init sem_init (void) IPC_SEM_IDS, sysvipc_sem_proc_show); } +/** + * unmerge_queues - unmerge queues, if possible. + * @sma: semaphore array + * + * The function unmerges the wait queues if complex_count is 0. + * It must be called prior to dropping the global semaphore array lock. + */ +static void unmerge_queues(struct sem_array *sma) +{ + struct sem_queue *q, *tq; + + /* complex operations still around? */ + if (sma->complex_count) + return; + /* + * We will switch back to simple mode. + * Move all pending operation back into the per-semaphore + * queues. + */ + list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { + struct sem *curr; + curr = &sma->sem_base[q->sops[0].sem_num]; + + list_add_tail(&q->list, &curr->pending_alter); + } + INIT_LIST_HEAD(&sma->pending_alter); +} + +/** + * merge_queues - Merge single semop queues into global queue + * @sma: semaphore array + * + * This function merges all per-semaphore queues into the global queue. + * It is necessary to achieve FIFO ordering for the pending single-sop + * operations when a multi-semop operation must sleep. + * Only the alter operations must be moved, the const operations can stay. + */ +static void merge_queues(struct sem_array *sma) +{ + int i; + for (i = 0; i < sma->sem_nsems; i++) { + struct sem *sem = sma->sem_base + i; + + list_splice_init(&sem->pending_alter, &sma->pending_alter); + } +} + +static void sem_rcu_free(struct rcu_head *head) +{ + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); + struct sem_array *sma = ipc_rcu_to_struct(p); + + security_sem_free(sma); + ipc_rcu_free(head); +} + +/* + * Wait until all currently ongoing simple ops have completed. + * Caller must own sem_perm.lock. + * New simple ops cannot start, because simple ops first check + * that sem_perm.lock is free. + * that a) sem_perm.lock is free and b) complex_count is 0. + */ +static void sem_wait_array(struct sem_array *sma) +{ + int i; + struct sem *sem; + + if (sma->complex_count) { + /* The thread that increased sma->complex_count waited on + * all sem->lock locks. Thus we don't need to wait again. + */ + return; + } + + for (i = 0; i < sma->sem_nsems; i++) { + sem = sma->sem_base + i; + spin_unlock_wait(&sem->lock); + } +} + /* * If the request contains only one semaphore operation, and there are * no complex transactions pending, lock only the semaphore involved. * Otherwise, lock the entire semaphore array, since we either have * multiple semaphores in our own semops, or we need to look at * semaphores from other pending complex operations. - * - * Carefully guard against sma->complex_count changing between zero - * and non-zero while we are spinning for the lock. The value of - * sma->complex_count cannot change while we are holding the lock, - * so sem_unlock should be fine. - * - * The global lock path checks that all the local locks have been released, - * checking each local lock once. This means that the local lock paths - * cannot start their critical sections while the global lock is held. */ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, int nsops) { - int locknum; - again: - if (nsops == 1 && !sma->complex_count) { - struct sem *sem = sma->sem_base + sops->sem_num; + struct sem *sem; - /* Lock just the semaphore we are interested in. */ - spin_lock(&sem->lock); + if (nsops != 1) { + /* Complex operation - acquire a full lock */ + ipc_lock_object(&sma->sem_perm); - /* - * If sma->complex_count was set while we were spinning, - * we may need to look at things we did not lock here. + /* And wait until all simple ops that are processed + * right now have dropped their locks. */ - if (unlikely(sma->complex_count)) { - spin_unlock(&sem->lock); - goto lock_array; - } + sem_wait_array(sma); + return -1; + } + + /* + * Only one semaphore affected - try to optimize locking. + * The rules are: + * - optimized locking is possible if no complex operation + * is either enqueued or processed right now. + * - The test for enqueued complex ops is simple: + * sma->complex_count != 0 + * - Testing for complex ops that are processed right now is + * a bit more difficult. Complex ops acquire the full lock + * and first wait that the running simple ops have completed. + * (see above) + * Thus: If we own a simple lock and the global lock is free + * and complex_count is now 0, then it will stay 0 and + * thus just locking sem->lock is sufficient. + */ + sem = sma->sem_base + sops->sem_num; + if (sma->complex_count == 0) { /* - * Another process is holding the global lock on the - * sem_array; we cannot enter our critical section, - * but have to wait for the global lock to be released. + * It appears that no complex operation is around. + * Acquire the per-semaphore lock. */ - if (unlikely(spin_is_locked(&sma->sem_perm.lock))) { - spin_unlock(&sem->lock); - spin_unlock_wait(&sma->sem_perm.lock); - goto again; + spin_lock(&sem->lock); + + /* Then check that the global lock is free */ + if (!spin_is_locked(&sma->sem_perm.lock)) { + /* spin_is_locked() is not a memory barrier */ + smp_mb(); + + /* Now repeat the test of complex_count: + * It can't change anymore until we drop sem->lock. + * Thus: if is now 0, then it will stay 0. + */ + if (sma->complex_count == 0) { + /* fast path successful! */ + return sops->sem_num; + } } + spin_unlock(&sem->lock); + } + + /* slow path: acquire the full lock */ + ipc_lock_object(&sma->sem_perm); - locknum = sops->sem_num; + if (sma->complex_count == 0) { + /* False alarm: + * There is no complex operation, thus we can switch + * back to the fast path. + */ + spin_lock(&sem->lock); + ipc_unlock_object(&sma->sem_perm); + return sops->sem_num; } else { - int i; - /* - * Lock the semaphore array, and wait for all of the - * individual semaphore locks to go away. The code - * above ensures no new single-lock holders will enter - * their critical section while the array lock is held. + /* Not a false alarm, thus complete the sequence for a + * full lock. */ - lock_array: - spin_lock(&sma->sem_perm.lock); - for (i = 0; i < sma->sem_nsems; i++) { - struct sem *sem = sma->sem_base + i; - spin_unlock_wait(&sem->lock); - } - locknum = -1; + sem_wait_array(sma); + return -1; } - return locknum; } static inline void sem_unlock(struct sem_array *sma, int locknum) { if (locknum == -1) { - spin_unlock(&sma->sem_perm.lock); + unmerge_queues(sma); + ipc_unlock_object(&sma->sem_perm); } else { struct sem *sem = sma->sem_base + locknum; spin_unlock(&sem->lock); @@ -267,7 +373,7 @@ static inline void sem_unlock(struct sem_array *sma, int locknum) } /* - * sem_lock_(check_) routines are called in the paths where the rw_mutex + * sem_lock_(check_) routines are called in the paths where the rwsem * is not held. * * The caller holds the RCU read lock. @@ -319,12 +425,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns static inline void sem_lock_and_putref(struct sem_array *sma) { sem_lock(sma, NULL, -1); - ipc_rcu_putref(sma); -} - -static inline void sem_putref(struct sem_array *sma) -{ - ipc_rcu_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); } static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) @@ -337,7 +438,7 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) * Without the check/retry algorithm a lockless wakeup is possible: * - queue.status is initialized to -EINTR before blocking. * - wakeup is performed by - * * unlinking the queue entry from sma->sem_pending + * * unlinking the queue entry from the pending list * * setting queue.status to IN_WAKEUP * This is the notification for the blocked thread that a * result value is imminent. @@ -371,7 +472,7 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) * @ns: namespace * @params: ptr to the structure that contains key, semflg and nsems * - * Called with sem_ids.rw_mutex held (as a writer) + * Called with sem_ids.rwsem held (as a writer) */ static int newary(struct ipc_namespace *ns, struct ipc_params *params) @@ -403,14 +504,13 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) sma->sem_perm.security = NULL; retval = security_sem_alloc(sma); if (retval) { - ipc_rcu_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); return retval; } id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); if (id < 0) { - security_sem_free(sma); - ipc_rcu_putref(sma); + ipc_rcu_putref(sma, sem_rcu_free); return id; } ns->used_sems += nsems; @@ -418,12 +518,14 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) sma->sem_base = (struct sem *) &sma[1]; for (i = 0; i < nsems; i++) { - INIT_LIST_HEAD(&sma->sem_base[i].sem_pending); + INIT_LIST_HEAD(&sma->sem_base[i].pending_alter); + INIT_LIST_HEAD(&sma->sem_base[i].pending_const); spin_lock_init(&sma->sem_base[i].lock); } sma->complex_count = 0; - INIT_LIST_HEAD(&sma->sem_pending); + INIT_LIST_HEAD(&sma->pending_alter); + INIT_LIST_HEAD(&sma->pending_const); INIT_LIST_HEAD(&sma->list_id); sma->sem_nsems = nsems; sma->sem_ctime = get_seconds(); @@ -435,7 +537,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) /* - * Called with sem_ids.rw_mutex and ipcp locked. + * Called with sem_ids.rwsem and ipcp locked. */ static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) { @@ -446,7 +548,7 @@ static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) } /* - * Called with sem_ids.rw_mutex and ipcp locked. + * Called with sem_ids.rwsem and ipcp locked. */ static inline int sem_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params) @@ -482,12 +584,19 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); } -/* - * Determine whether a sequence of semaphore operations would succeed - * all at once. Return 0 if yes, 1 if need to sleep, else return error code. +/** perform_atomic_semop - Perform (if possible) a semaphore operation + * @sma: semaphore array + * @sops: array with operations that should be checked + * @nsems: number of sops + * @un: undo array + * @pid: pid that did the change + * + * Returns 0 if the operation was possible. + * Returns 1 if the operation is impossible, the caller must sleep. + * Negative values are error codes. */ -static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, +static int perform_atomic_semop(struct sem_array *sma, struct sembuf *sops, int nsops, struct sem_undo *un, int pid) { int result, sem_op; @@ -609,60 +718,132 @@ static void unlink_queue(struct sem_array *sma, struct sem_queue *q) * update_queue is O(N^2) when it restarts scanning the whole queue of * waiting operations. Therefore this function checks if the restart is * really necessary. It is called after a previously waiting operation - * was completed. + * modified the array. + * Note that wait-for-zero operations are handled without restart. */ static int check_restart(struct sem_array *sma, struct sem_queue *q) { - struct sem *curr; - struct sem_queue *h; - - /* if the operation didn't modify the array, then no restart */ - if (q->alter == 0) - return 0; - - /* pending complex operations are too difficult to analyse */ - if (sma->complex_count) + /* pending complex alter operations are too difficult to analyse */ + if (!list_empty(&sma->pending_alter)) return 1; /* we were a sleeping complex operation. Too difficult */ if (q->nsops > 1) return 1; - curr = sma->sem_base + q->sops[0].sem_num; + /* It is impossible that someone waits for the new value: + * - complex operations always restart. + * - wait-for-zero are handled seperately. + * - q is a previously sleeping simple operation that + * altered the array. It must be a decrement, because + * simple increments never sleep. + * - If there are older (higher priority) decrements + * in the queue, then they have observed the original + * semval value and couldn't proceed. The operation + * decremented to value - thus they won't proceed either. + */ + return 0; +} - /* No-one waits on this queue */ - if (list_empty(&curr->sem_pending)) - return 0; +/** + * wake_const_ops(sma, semnum, pt) - Wake up non-alter tasks + * @sma: semaphore array. + * @semnum: semaphore that was modified. + * @pt: list head for the tasks that must be woken up. + * + * wake_const_ops must be called after a semaphore in a semaphore array + * was set to 0. If complex const operations are pending, wake_const_ops must + * be called with semnum = -1, as well as with the number of each modified + * semaphore. + * The tasks that must be woken up are added to @pt. The return code + * is stored in q->pid. + * The function returns 1 if at least one operation was completed successfully. + */ +static int wake_const_ops(struct sem_array *sma, int semnum, + struct list_head *pt) +{ + struct sem_queue *q; + struct list_head *walk; + struct list_head *pending_list; + int semop_completed = 0; - /* the new semaphore value */ - if (curr->semval) { - /* It is impossible that someone waits for the new value: - * - q is a previously sleeping simple operation that - * altered the array. It must be a decrement, because - * simple increments never sleep. - * - The value is not 0, thus wait-for-zero won't proceed. - * - If there are older (higher priority) decrements - * in the queue, then they have observed the original - * semval value and couldn't proceed. The operation - * decremented to value - thus they won't proceed either. + if (semnum == -1) + pending_list = &sma->pending_const; + else + pending_list = &sma->sem_base[semnum].pending_const; + + walk = pending_list->next; + while (walk != pending_list) { + int error; + + q = container_of(walk, struct sem_queue, list); + walk = walk->next; + + error = perform_atomic_semop(sma, q->sops, q->nsops, + q->undo, q->pid); + + if (error <= 0) { + /* operation completed, remove from queue & wakeup */ + + unlink_queue(sma, q); + + wake_up_sem_queue_prepare(pt, q, error); + if (error == 0) + semop_completed = 1; + } + } + return semop_completed; +} + +/** + * do_smart_wakeup_zero(sma, sops, nsops, pt) - wakeup all wait for zero tasks + * @sma: semaphore array + * @sops: operations that were performed + * @nsops: number of operations + * @pt: list head of the tasks that must be woken up. + * + * do_smart_wakeup_zero() checks all required queue for wait-for-zero + * operations, based on the actual changes that were performed on the + * semaphore array. + * The function returns 1 if at least one operation was completed successfully. + */ +static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, + int nsops, struct list_head *pt) +{ + int i; + int semop_completed = 0; + int got_zero = 0; + + /* first: the per-semaphore queues, if known */ + if (sops) { + for (i = 0; i < nsops; i++) { + int num = sops[i].sem_num; + + if (sma->sem_base[num].semval == 0) { + got_zero = 1; + semop_completed |= wake_const_ops(sma, num, pt); + } + } + } else { + /* + * No sops means modified semaphores not known. + * Assume all were changed. */ - BUG_ON(q->sops[0].sem_op >= 0); - return 0; + for (i = 0; i < sma->sem_nsems; i++) { + if (sma->sem_base[i].semval == 0) { + got_zero = 1; + semop_completed |= wake_const_ops(sma, i, pt); + } + } } /* - * semval is 0. Check if there are wait-for-zero semops. - * They must be the first entries in the per-semaphore queue + * If one of the modified semaphores got 0, + * then check the global queue, too. */ - h = list_first_entry(&curr->sem_pending, struct sem_queue, list); - BUG_ON(h->nsops != 1); - BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num); + if (got_zero) + semop_completed |= wake_const_ops(sma, -1, pt); - /* Yes, there is a wait-for-zero semop. Restart */ - if (h->sops[0].sem_op == 0) - return 1; - - /* Again - no-one is waiting for the new value. */ - return 0; + return semop_completed; } @@ -678,6 +859,8 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q) * semaphore. * The tasks that must be woken up are added to @pt. The return code * is stored in q->pid. + * The function internally checks if const operations can now succeed. + * * The function return 1 if at least one semop was completed successfully. */ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) @@ -688,9 +871,9 @@ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) int semop_completed = 0; if (semnum == -1) - pending_list = &sma->sem_pending; + pending_list = &sma->pending_alter; else - pending_list = &sma->sem_base[semnum].sem_pending; + pending_list = &sma->sem_base[semnum].pending_alter; again: walk = pending_list->next; @@ -702,16 +885,15 @@ again: /* If we are scanning the single sop, per-semaphore list of * one semaphore and that semaphore is 0, then it is not - * necessary to scan the "alter" entries: simple increments + * necessary to scan further: simple increments * that affect only one entry succeed immediately and cannot * be in the per semaphore pending queue, and decrements * cannot be successful if the value is already 0. */ - if (semnum != -1 && sma->sem_base[semnum].semval == 0 && - q->alter) + if (semnum != -1 && sma->sem_base[semnum].semval == 0) break; - error = try_atomic_semop(sma, q->sops, q->nsops, + error = perform_atomic_semop(sma, q->sops, q->nsops, q->undo, q->pid); /* Does q->sleeper still need to sleep? */ @@ -724,6 +906,7 @@ again: restart = 0; } else { semop_completed = 1; + do_smart_wakeup_zero(sma, q->sops, q->nsops, pt); restart = check_restart(sma, q); } @@ -735,6 +918,24 @@ again: } /** + * set_semotime(sma, sops) - set sem_otime + * @sma: semaphore array + * @sops: operations that modified the array, may be NULL + * + * sem_otime is replicated to avoid cache line trashing. + * This function sets one instance to the current time. + */ +static void set_semotime(struct sem_array *sma, struct sembuf *sops) +{ + if (sops == NULL) { + sma->sem_base[0].sem_otime = get_seconds(); + } else { + sma->sem_base[sops[0].sem_num].sem_otime = + get_seconds(); + } +} + +/** * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue * @sma: semaphore array * @sops: operations that were performed @@ -742,8 +943,8 @@ again: * @otime: force setting otime * @pt: list head of the tasks that must be woken up. * - * do_smart_update() does the required called to update_queue, based on the - * actual changes that were performed on the semaphore array. + * do_smart_update() does the required calls to update_queue and wakeup_zero, + * based on the actual changes that were performed on the semaphore array. * Note that the function does not do the actual wake-up: the caller is * responsible for calling wake_up_sem_queue_do(@pt). * It is safe to perform this call after dropping all locks. @@ -752,52 +953,42 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop int otime, struct list_head *pt) { int i; - int progress; - - progress = 1; -retry_global: - if (sma->complex_count) { - if (update_queue(sma, -1, pt)) { - progress = 1; - otime = 1; - sops = NULL; - } - } - if (!progress) - goto done; - if (!sops) { - /* No semops; something special is going on. */ - for (i = 0; i < sma->sem_nsems; i++) { - if (update_queue(sma, i, pt)) { - otime = 1; - progress = 1; - } - } - goto done_checkretry; - } + otime |= do_smart_wakeup_zero(sma, sops, nsops, pt); - /* Check the semaphores that were modified. */ - for (i = 0; i < nsops; i++) { - if (sops[i].sem_op > 0 || - (sops[i].sem_op < 0 && - sma->sem_base[sops[i].sem_num].semval == 0)) - if (update_queue(sma, sops[i].sem_num, pt)) { - otime = 1; - progress = 1; + if (!list_empty(&sma->pending_alter)) { + /* semaphore array uses the global queue - just process it. */ + otime |= update_queue(sma, -1, pt); + } else { + if (!sops) { + /* + * No sops, thus the modified semaphores are not + * known. Check all. + */ + for (i = 0; i < sma->sem_nsems; i++) + otime |= update_queue(sma, i, pt); + } else { + /* + * Check the semaphores that were increased: + * - No complex ops, thus all sleeping ops are + * decrease. + * - if we decreased the value, then any sleeping + * semaphore ops wont be able to run: If the + * previous value was too small, then the new + * value will be too small, too. + */ + for (i = 0; i < nsops; i++) { + if (sops[i].sem_op > 0) { + otime |= update_queue(sma, + sops[i].sem_num, pt); + } } + } } -done_checkretry: - if (progress) { - progress = 0; - goto retry_global; - } -done: if (otime) - sma->sem_otime = get_seconds(); + set_semotime(sma, sops); } - /* The following counts are associated to each semaphore: * semncnt number of tasks waiting on semval being nonzero * semzcnt number of tasks waiting on semval being zero @@ -813,14 +1004,14 @@ static int count_semncnt (struct sem_array * sma, ushort semnum) struct sem_queue * q; semncnt = 0; - list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) { + list_for_each_entry(q, &sma->sem_base[semnum].pending_alter, list) { struct sembuf * sops = q->sops; BUG_ON(sops->sem_num != semnum); if ((sops->sem_op < 0) && !(sops->sem_flg & IPC_NOWAIT)) semncnt++; } - list_for_each_entry(q, &sma->sem_pending, list) { + list_for_each_entry(q, &sma->pending_alter, list) { struct sembuf * sops = q->sops; int nsops = q->nsops; int i; @@ -839,14 +1030,14 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum) struct sem_queue * q; semzcnt = 0; - list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) { + list_for_each_entry(q, &sma->sem_base[semnum].pending_const, list) { struct sembuf * sops = q->sops; BUG_ON(sops->sem_num != semnum); if ((sops->sem_op == 0) && !(sops->sem_flg & IPC_NOWAIT)) semzcnt++; } - list_for_each_entry(q, &sma->sem_pending, list) { + list_for_each_entry(q, &sma->pending_const, list) { struct sembuf * sops = q->sops; int nsops = q->nsops; int i; @@ -859,8 +1050,8 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum) return semzcnt; } -/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked - * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex +/* Free a semaphore set. freeary() is called with sem_ids.rwsem locked + * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem * remains locked on exit. */ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) @@ -872,7 +1063,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) int i; /* Free the existing undo structures for this semaphore set. */ - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { list_del(&un->list_id); spin_lock(&un->ulp->lock); @@ -884,13 +1075,22 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) /* Wake up all pending processes and let them fail with EIDRM. */ INIT_LIST_HEAD(&tasks); - list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { + list_for_each_entry_safe(q, tq, &sma->pending_const, list) { + unlink_queue(sma, q); + wake_up_sem_queue_prepare(&tasks, q, -EIDRM); + } + + list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { unlink_queue(sma, q); wake_up_sem_queue_prepare(&tasks, q, -EIDRM); } for (i = 0; i < sma->sem_nsems; i++) { struct sem *sem = sma->sem_base + i; - list_for_each_entry_safe(q, tq, &sem->sem_pending, list) { + list_for_each_entry_safe(q, tq, &sem->pending_const, list) { + unlink_queue(sma, q); + wake_up_sem_queue_prepare(&tasks, q, -EIDRM); + } + list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { unlink_queue(sma, q); wake_up_sem_queue_prepare(&tasks, q, -EIDRM); } @@ -903,8 +1103,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) wake_up_sem_queue_do(&tasks); ns->used_sems -= sma->sem_nsems; - security_sem_free(sma); - ipc_rcu_putref(sma); + ipc_rcu_putref(sma, sem_rcu_free); } static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) @@ -931,6 +1130,21 @@ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, } } +static time_t get_semotime(struct sem_array *sma) +{ + int i; + time_t res; + + res = sma->sem_base[0].sem_otime; + for (i = 1; i < sma->sem_nsems; i++) { + time_t to = sma->sem_base[i].sem_otime; + + if (to > res) + res = to; + } + return res; +} + static int semctl_nolock(struct ipc_namespace *ns, int semid, int cmd, int version, void __user *p) { @@ -957,7 +1171,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, seminfo.semmnu = SEMMNU; seminfo.semmap = SEMMAP; seminfo.semume = SEMUME; - down_read(&sem_ids(ns).rw_mutex); + down_read(&sem_ids(ns).rwsem); if (cmd == SEM_INFO) { seminfo.semusz = sem_ids(ns).in_use; seminfo.semaem = ns->used_sems; @@ -966,7 +1180,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, seminfo.semaem = SEMAEM; } max_id = ipc_get_maxid(&sem_ids(ns)); - up_read(&sem_ids(ns).rw_mutex); + up_read(&sem_ids(ns).rwsem); if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) return -EFAULT; return (max_id < 0) ? 0: max_id; @@ -1004,9 +1218,9 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, goto out_unlock; kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); - tbuf.sem_otime = sma->sem_otime; - tbuf.sem_ctime = sma->sem_ctime; - tbuf.sem_nsems = sma->sem_nsems; + tbuf.sem_otime = get_semotime(sma); + tbuf.sem_ctime = sma->sem_ctime; + tbuf.sem_nsems = sma->sem_nsems; rcu_read_unlock(); if (copy_semid_to_user(p, &tbuf, version)) return -EFAULT; @@ -1070,7 +1284,7 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, curr = &sma->sem_base[semnum]; - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_for_each_entry(un, &sma->list_id, list_id) un->semadj[semnum] = 0; @@ -1133,7 +1347,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, rcu_read_unlock(); sem_io = ipc_alloc(sizeof(ushort)*nsems); if(sem_io == NULL) { - sem_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); return -ENOMEM; } @@ -1169,20 +1383,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, if(nsems > SEMMSL_FAST) { sem_io = ipc_alloc(sizeof(ushort)*nsems); if(sem_io == NULL) { - sem_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); return -ENOMEM; } } if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { - sem_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); err = -EFAULT; goto out_free; } for (i = 0; i < nsems; i++) { if (sem_io[i] > SEMVMX) { - sem_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); err = -ERANGE; goto out_free; } @@ -1199,7 +1413,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, for (i = 0; i < nsems; i++) sma->sem_base[i].semval = sem_io[i]; - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_for_each_entry(un, &sma->list_id, list_id) { for (i = 0; i < nsems; i++) un->semadj[i] = 0; @@ -1272,9 +1486,9 @@ copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) } /* - * This function handles some semctl commands which require the rw_mutex + * This function handles some semctl commands which require the rwsem * to be held in write mode. - * NOTE: no locks must be held, the rw_mutex is taken inside this function. + * NOTE: no locks must be held, the rwsem is taken inside this function. */ static int semctl_down(struct ipc_namespace *ns, int semid, int cmd, int version, void __user *p) @@ -1289,42 +1503,46 @@ static int semctl_down(struct ipc_namespace *ns, int semid, return -EFAULT; } + down_write(&sem_ids(ns).rwsem); + rcu_read_lock(); + ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, &semid64.sem_perm, 0); - if (IS_ERR(ipcp)) - return PTR_ERR(ipcp); + if (IS_ERR(ipcp)) { + err = PTR_ERR(ipcp); + goto out_unlock1; + } sma = container_of(ipcp, struct sem_array, sem_perm); err = security_sem_semctl(sma, cmd); - if (err) { - rcu_read_unlock(); - goto out_up; - } + if (err) + goto out_unlock1; - switch(cmd){ + switch (cmd) { case IPC_RMID: sem_lock(sma, NULL, -1); + /* freeary unlocks the ipc object and rcu */ freeary(ns, ipcp); goto out_up; case IPC_SET: sem_lock(sma, NULL, -1); err = ipc_update_perm(&semid64.sem_perm, ipcp); if (err) - goto out_unlock; + goto out_unlock0; sma->sem_ctime = get_seconds(); break; default: - rcu_read_unlock(); err = -EINVAL; - goto out_up; + goto out_unlock1; } -out_unlock: +out_unlock0: sem_unlock(sma, -1); +out_unlock1: rcu_read_unlock(); out_up: - up_write(&sem_ids(ns).rw_mutex); + up_write(&sem_ids(ns).rwsem); return err; } @@ -1466,7 +1684,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) /* step 2: allocate new undo structure */ new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); if (!new) { - sem_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); return ERR_PTR(-ENOMEM); } @@ -1496,7 +1714,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) new->semid = semid; assert_spin_locked(&ulp->lock); list_add_rcu(&new->list_proc, &ulp->list_proc); - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_add(&new->list_id, &sma->list_id); un = new; @@ -1533,7 +1751,6 @@ static int get_queue_result(struct sem_queue *q) return error; } - SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, unsigned, nsops, const struct timespec __user *, timeout) { @@ -1631,13 +1848,19 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, if (un && un->semid == -1) goto out_unlock_free; - error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); - if (error <= 0) { - if (alter && error == 0) + error = perform_atomic_semop(sma, sops, nsops, un, + task_tgid_vnr(current)); + if (error == 0) { + /* If the operation was successful, then do + * the required updates. + */ + if (alter) do_smart_update(sma, sops, nsops, 1, &tasks); - - goto out_unlock_free; + else + set_semotime(sma, sops); } + if (error <= 0) + goto out_unlock_free; /* We need to sleep on this operation, so we put the current * task into the pending queue and go to sleep. @@ -1653,15 +1876,27 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, struct sem *curr; curr = &sma->sem_base[sops->sem_num]; - if (alter) - list_add_tail(&queue.list, &curr->sem_pending); - else - list_add(&queue.list, &curr->sem_pending); + if (alter) { + if (sma->complex_count) { + list_add_tail(&queue.list, + &sma->pending_alter); + } else { + + list_add_tail(&queue.list, + &curr->pending_alter); + } + } else { + list_add_tail(&queue.list, &curr->pending_const); + } } else { + if (!sma->complex_count) + merge_queues(sma); + if (alter) - list_add_tail(&queue.list, &sma->sem_pending); + list_add_tail(&queue.list, &sma->pending_alter); else - list_add(&queue.list, &sma->sem_pending); + list_add_tail(&queue.list, &sma->pending_const); + sma->complex_count++; } @@ -1833,7 +2068,7 @@ void exit_sem(struct task_struct *tsk) } /* remove un from the linked lists */ - assert_spin_locked(&sma->sem_perm.lock); + ipc_assert_locked_object(&sma->sem_perm); list_del(&un->list_id); spin_lock(&ulp->lock); @@ -1882,6 +2117,17 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it) { struct user_namespace *user_ns = seq_user_ns(s); struct sem_array *sma = it; + time_t sem_otime; + + /* + * The proc interface isn't aware of sem_lock(), it calls + * ipc_lock_object() directly (in sysvipc_find_ipc). + * In order to stay compatible with sem_lock(), we must wait until + * all simple semop() calls have left their critical regions. + */ + sem_wait_array(sma); + + sem_otime = get_semotime(sma); return seq_printf(s, "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n", @@ -1893,7 +2139,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it) from_kgid_munged(user_ns, sma->sem_perm.gid), from_kuid_munged(user_ns, sma->sem_perm.cuid), from_kgid_munged(user_ns, sma->sem_perm.cgid), - sma->sem_otime, + sem_otime, sma->sem_ctime); } #endif diff --git a/ipc/shm.c b/ipc/shm.c index 7e199fa..7b87bea 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -19,6 +19,9 @@ * namespaces support * OpenVZ, SWsoft Inc. * Pavel Emelianov + * + * Better ipc lock (kern_ipc_perm.lock) handling + * Davidlohr Bueso , June 2013. */ #include @@ -80,8 +83,8 @@ void shm_init_ns(struct ipc_namespace *ns) } /* - * Called with shm_ids.rw_mutex (writer) and the shp structure locked. - * Only shm_ids.rw_mutex remains locked on exit. + * Called with shm_ids.rwsem (writer) and the shp structure locked. + * Only shm_ids.rwsem remains locked on exit. */ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) { @@ -124,8 +127,28 @@ void __init shm_init (void) IPC_SHM_IDS, sysvipc_shm_proc_show); } +static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id) +{ + struct kern_ipc_perm *ipcp = ipc_obtain_object(&shm_ids(ns), id); + + if (IS_ERR(ipcp)) + return ERR_CAST(ipcp); + + return container_of(ipcp, struct shmid_kernel, shm_perm); +} + +static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id) +{ + struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id); + + if (IS_ERR(ipcp)) + return ERR_CAST(ipcp); + + return container_of(ipcp, struct shmid_kernel, shm_perm); +} + /* - * shm_lock_(check_) routines are called in the paths where the rw_mutex + * shm_lock_(check_) routines are called in the paths where the rwsem * is not necessarily held. */ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) @@ -141,18 +164,16 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) { rcu_read_lock(); - spin_lock(&ipcp->shm_perm.lock); + ipc_lock_object(&ipcp->shm_perm); } -static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns, - int id) +static void shm_rcu_free(struct rcu_head *head) { - struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id); - - if (IS_ERR(ipcp)) - return (struct shmid_kernel *)ipcp; + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); + struct shmid_kernel *shp = ipc_rcu_to_struct(p); - return container_of(ipcp, struct shmid_kernel, shm_perm); + security_shm_free(shp); + ipc_rcu_free(head); } static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) @@ -182,7 +203,7 @@ static void shm_open(struct vm_area_struct *vma) * @ns: namespace * @shp: struct to free * - * It has to be called with shp and shm_ids.rw_mutex (writer) locked, + * It has to be called with shp and shm_ids.rwsem (writer) locked, * but returns with shp unlocked and freed. */ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) @@ -196,8 +217,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) user_shm_unlock(file_inode(shp->shm_file)->i_size, shp->mlock_user); fput (shp->shm_file); - security_shm_free(shp); - ipc_rcu_putref(shp); + ipc_rcu_putref(shp, shm_rcu_free); } /* @@ -230,7 +250,7 @@ static void shm_close(struct vm_area_struct *vma) struct shmid_kernel *shp; struct ipc_namespace *ns = sfd->ns; - down_write(&shm_ids(ns).rw_mutex); + down_write(&shm_ids(ns).rwsem); /* remove from the list of attaches of the shm segment */ shp = shm_lock(ns, sfd->id); BUG_ON(IS_ERR(shp)); @@ -241,10 +261,10 @@ static void shm_close(struct vm_area_struct *vma) shm_destroy(ns, shp); else shm_unlock(shp); - up_write(&shm_ids(ns).rw_mutex); + up_write(&shm_ids(ns).rwsem); } -/* Called with ns->shm_ids(ns).rw_mutex locked */ +/* Called with ns->shm_ids(ns).rwsem locked */ static int shm_try_destroy_current(int id, void *p, void *data) { struct ipc_namespace *ns = data; @@ -275,7 +295,7 @@ static int shm_try_destroy_current(int id, void *p, void *data) return 0; } -/* Called with ns->shm_ids(ns).rw_mutex locked */ +/* Called with ns->shm_ids(ns).rwsem locked */ static int shm_try_destroy_orphaned(int id, void *p, void *data) { struct ipc_namespace *ns = data; @@ -286,7 +306,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) * We want to destroy segments without users and with already * exit'ed originating process. * - * As shp->* are changed under rw_mutex, it's safe to skip shp locking. + * As shp->* are changed under rwsem, it's safe to skip shp locking. */ if (shp->shm_creator != NULL) return 0; @@ -300,10 +320,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) void shm_destroy_orphaned(struct ipc_namespace *ns) { - down_write(&shm_ids(ns).rw_mutex); + down_write(&shm_ids(ns).rwsem); if (shm_ids(ns).in_use) idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); - up_write(&shm_ids(ns).rw_mutex); + up_write(&shm_ids(ns).rwsem); } @@ -315,10 +335,10 @@ void exit_shm(struct task_struct *task) return; /* Destroy all already created segments, but not mapped yet */ - down_write(&shm_ids(ns).rw_mutex); + down_write(&shm_ids(ns).rwsem); if (shm_ids(ns).in_use) idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); - up_write(&shm_ids(ns).rw_mutex); + up_write(&shm_ids(ns).rwsem); } static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) @@ -452,7 +472,7 @@ static const struct vm_operations_struct shm_vm_ops = { * @ns: namespace * @params: ptr to the structure that contains key, size and shmflg * - * Called with shm_ids.rw_mutex held as a writer. + * Called with shm_ids.rwsem held as a writer. */ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) @@ -485,7 +505,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_perm.security = NULL; error = security_shm_alloc(shp); if (error) { - ipc_rcu_putref(shp); + ipc_rcu_putref(shp, ipc_rcu_free); return error; } @@ -535,6 +555,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_nattch = 0; shp->shm_file = file; shp->shm_creator = current; + /* * shmid gets reported as "inode#" in /proc/pid/maps. * proc-ps tools use this. Changing this will break them. @@ -543,7 +564,9 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) ns->shm_tot += numpages; error = shp->shm_perm.id; - shm_unlock(shp); + + ipc_unlock_object(&shp->shm_perm); + rcu_read_unlock(); return error; no_id: @@ -551,13 +574,12 @@ no_id: user_shm_unlock(size, shp->mlock_user); fput(file); no_file: - security_shm_free(shp); - ipc_rcu_putref(shp); + ipc_rcu_putref(shp, shm_rcu_free); return error; } /* - * Called with shm_ids.rw_mutex and ipcp locked. + * Called with shm_ids.rwsem and ipcp locked. */ static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) { @@ -568,7 +590,7 @@ static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) } /* - * Called with shm_ids.rw_mutex and ipcp locked. + * Called with shm_ids.rwsem and ipcp locked. */ static inline int shm_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params) @@ -681,7 +703,7 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf /* * Calculate and add used RSS and swap pages of a shm. - * Called with shm_ids.rw_mutex held as a reader + * Called with shm_ids.rwsem held as a reader */ static void shm_add_rss_swap(struct shmid_kernel *shp, unsigned long *rss_add, unsigned long *swp_add) @@ -708,7 +730,7 @@ static void shm_add_rss_swap(struct shmid_kernel *shp, } /* - * Called with shm_ids.rw_mutex held as a reader + * Called with shm_ids.rwsem held as a reader */ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, unsigned long *swp) @@ -737,9 +759,9 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, } /* - * This function handles some shmctl commands which require the rw_mutex + * This function handles some shmctl commands which require the rwsem * to be held in write mode. - * NOTE: no locks must be held, the rw_mutex is taken inside this function. + * NOTE: no locks must be held, the rwsem is taken inside this function. */ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, struct shmid_ds __user *buf, int version) @@ -754,59 +776,67 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, return -EFAULT; } - ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd, - &shmid64.shm_perm, 0); - if (IS_ERR(ipcp)) - return PTR_ERR(ipcp); + down_write(&shm_ids(ns).rwsem); + rcu_read_lock(); + + ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd, + &shmid64.shm_perm, 0); + if (IS_ERR(ipcp)) { + err = PTR_ERR(ipcp); + goto out_unlock1; + } shp = container_of(ipcp, struct shmid_kernel, shm_perm); err = security_shm_shmctl(shp, cmd); if (err) - goto out_unlock; + goto out_unlock1; + switch (cmd) { case IPC_RMID: + ipc_lock_object(&shp->shm_perm); + /* do_shm_rmid unlocks the ipc object and rcu */ do_shm_rmid(ns, ipcp); goto out_up; case IPC_SET: + ipc_lock_object(&shp->shm_perm); err = ipc_update_perm(&shmid64.shm_perm, ipcp); if (err) - goto out_unlock; + goto out_unlock0; shp->shm_ctim = get_seconds(); break; default: err = -EINVAL; + goto out_unlock1; } -out_unlock: - shm_unlock(shp); + +out_unlock0: + ipc_unlock_object(&shp->shm_perm); +out_unlock1: + rcu_read_unlock(); out_up: - up_write(&shm_ids(ns).rw_mutex); + up_write(&shm_ids(ns).rwsem); return err; } -SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) +static int shmctl_nolock(struct ipc_namespace *ns, int shmid, + int cmd, int version, void __user *buf) { + int err; struct shmid_kernel *shp; - int err, version; - struct ipc_namespace *ns; - if (cmd < 0 || shmid < 0) { - err = -EINVAL; - goto out; + /* preliminary security checks for *_INFO */ + if (cmd == IPC_INFO || cmd == SHM_INFO) { + err = security_shm_shmctl(NULL, cmd); + if (err) + return err; } - version = ipc_parse_version(&cmd); - ns = current->nsproxy->ipc_ns; - - switch (cmd) { /* replace with proc interface ? */ + switch (cmd) { case IPC_INFO: { struct shminfo64 shminfo; - err = security_shm_shmctl(NULL, cmd); - if (err) - return err; - memset(&shminfo, 0, sizeof(shminfo)); shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni; shminfo.shmmax = ns->shm_ctlmax; @@ -816,9 +846,9 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) if(copy_shminfo_to_user (buf, &shminfo, version)) return -EFAULT; - down_read(&shm_ids(ns).rw_mutex); + down_read(&shm_ids(ns).rwsem); err = ipc_get_maxid(&shm_ids(ns)); - up_read(&shm_ids(ns).rw_mutex); + up_read(&shm_ids(ns).rwsem); if(err<0) err = 0; @@ -828,19 +858,15 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) { struct shm_info shm_info; - err = security_shm_shmctl(NULL, cmd); - if (err) - return err; - memset(&shm_info, 0, sizeof(shm_info)); - down_read(&shm_ids(ns).rw_mutex); + down_read(&shm_ids(ns).rwsem); shm_info.used_ids = shm_ids(ns).in_use; shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp); shm_info.shm_tot = ns->shm_tot; shm_info.swap_attempts = 0; shm_info.swap_successes = 0; err = ipc_get_maxid(&shm_ids(ns)); - up_read(&shm_ids(ns).rw_mutex); + up_read(&shm_ids(ns).rwsem); if (copy_to_user(buf, &shm_info, sizeof(shm_info))) { err = -EFAULT; goto out; @@ -855,27 +881,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) struct shmid64_ds tbuf; int result; + rcu_read_lock(); if (cmd == SHM_STAT) { - shp = shm_lock(ns, shmid); + shp = shm_obtain_object(ns, shmid); if (IS_ERR(shp)) { err = PTR_ERR(shp); - goto out; + goto out_unlock; } result = shp->shm_perm.id; } else { - shp = shm_lock_check(ns, shmid); + shp = shm_obtain_object_check(ns, shmid); if (IS_ERR(shp)) { err = PTR_ERR(shp); - goto out; + goto out_unlock; } result = 0; } + err = -EACCES; if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) goto out_unlock; + err = security_shm_shmctl(shp, cmd); if (err) goto out_unlock; + memset(&tbuf, 0, sizeof(tbuf)); kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm); tbuf.shm_segsz = shp->shm_segsz; @@ -885,43 +915,76 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) tbuf.shm_cpid = shp->shm_cprid; tbuf.shm_lpid = shp->shm_lprid; tbuf.shm_nattch = shp->shm_nattch; - shm_unlock(shp); - if(copy_shmid_to_user (buf, &tbuf, version)) + rcu_read_unlock(); + + if (copy_shmid_to_user(buf, &tbuf, version)) err = -EFAULT; else err = result; goto out; } + default: + return -EINVAL; + } + +out_unlock: + rcu_read_unlock(); +out: + return err; +} + +SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) +{ + struct shmid_kernel *shp; + int err, version; + struct ipc_namespace *ns; + + if (cmd < 0 || shmid < 0) + return -EINVAL; + + version = ipc_parse_version(&cmd); + ns = current->nsproxy->ipc_ns; + + switch (cmd) { + case IPC_INFO: + case SHM_INFO: + case SHM_STAT: + case IPC_STAT: + return shmctl_nolock(ns, shmid, cmd, version, buf); + case IPC_RMID: + case IPC_SET: + return shmctl_down(ns, shmid, cmd, buf, version); case SHM_LOCK: case SHM_UNLOCK: { struct file *shm_file; - shp = shm_lock_check(ns, shmid); + rcu_read_lock(); + shp = shm_obtain_object_check(ns, shmid); if (IS_ERR(shp)) { err = PTR_ERR(shp); - goto out; + goto out_unlock1; } audit_ipc_obj(&(shp->shm_perm)); + err = security_shm_shmctl(shp, cmd); + if (err) + goto out_unlock1; + ipc_lock_object(&shp->shm_perm); if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { kuid_t euid = current_euid(); err = -EPERM; if (!uid_eq(euid, shp->shm_perm.uid) && !uid_eq(euid, shp->shm_perm.cuid)) - goto out_unlock; + goto out_unlock0; if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) - goto out_unlock; + goto out_unlock0; } - err = security_shm_shmctl(shp, cmd); - if (err) - goto out_unlock; - shm_file = shp->shm_file; if (is_file_hugepages(shm_file)) - goto out_unlock; + goto out_unlock0; if (cmd == SHM_LOCK) { struct user_struct *user = current_user(); @@ -930,32 +993,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) shp->shm_perm.mode |= SHM_LOCKED; shp->mlock_user = user; } - goto out_unlock; + goto out_unlock0; } /* SHM_UNLOCK */ if (!(shp->shm_perm.mode & SHM_LOCKED)) - goto out_unlock; + goto out_unlock0; shmem_lock(shm_file, 0, shp->mlock_user); shp->shm_perm.mode &= ~SHM_LOCKED; shp->mlock_user = NULL; get_file(shm_file); - shm_unlock(shp); + ipc_unlock_object(&shp->shm_perm); + rcu_read_unlock(); shmem_unlock_mapping(shm_file->f_mapping); + fput(shm_file); - goto out; - } - case IPC_RMID: - case IPC_SET: - err = shmctl_down(ns, shmid, cmd, buf, version); return err; + } default: return -EINVAL; } -out_unlock: - shm_unlock(shp); -out: +out_unlock0: + ipc_unlock_object(&shp->shm_perm); +out_unlock1: + rcu_read_unlock(); return err; } @@ -1023,10 +1085,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, * additional creator id... */ ns = current->nsproxy->ipc_ns; - shp = shm_lock_check(ns, shmid); + rcu_read_lock(); + shp = shm_obtain_object_check(ns, shmid); if (IS_ERR(shp)) { err = PTR_ERR(shp); - goto out; + goto out_unlock; } err = -EACCES; @@ -1037,24 +1100,31 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, if (err) goto out_unlock; + ipc_lock_object(&shp->shm_perm); path = shp->shm_file->f_path; path_get(&path); shp->shm_nattch++; size = i_size_read(path.dentry->d_inode); - shm_unlock(shp); + ipc_unlock_object(&shp->shm_perm); + rcu_read_unlock(); err = -ENOMEM; sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); - if (!sfd) - goto out_put_dentry; + if (!sfd) { + path_put(&path); + goto out_nattch; + } file = alloc_file(&path, f_mode, is_file_hugepages(shp->shm_file) ? &shm_file_operations_huge : &shm_file_operations); err = PTR_ERR(file); - if (IS_ERR(file)) - goto out_free; + if (IS_ERR(file)) { + kfree(sfd); + path_put(&path); + goto out_nattch; + } file->private_data = sfd; file->f_mapping = shp->shm_file->f_mapping; @@ -1080,7 +1150,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, addr > current->mm->start_stack - size - PAGE_SIZE * 5) goto invalid; } - + addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate); *raddr = addr; err = 0; @@ -1095,7 +1165,7 @@ out_fput: fput(file); out_nattch: - down_write(&shm_ids(ns).rw_mutex); + down_write(&shm_ids(ns).rwsem); shp = shm_lock(ns, shmid); BUG_ON(IS_ERR(shp)); shp->shm_nattch--; @@ -1103,20 +1173,13 @@ out_nattch: shm_destroy(ns, shp); else shm_unlock(shp); - up_write(&shm_ids(ns).rw_mutex); - -out: + up_write(&shm_ids(ns).rwsem); return err; out_unlock: - shm_unlock(shp); - goto out; - -out_free: - kfree(sfd); -out_put_dentry: - path_put(&path); - goto out_nattch; + rcu_read_unlock(); +out: + return err; } SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) @@ -1221,8 +1284,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) #else /* CONFIG_MMU */ /* under NOMMU conditions, the exact address to be destroyed must be * given */ - retval = -EINVAL; - if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { + if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); retval = 0; } diff --git a/ipc/util.c b/ipc/util.c index 809ec5e..fdb8ae7 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -15,6 +15,14 @@ * Jun 2006 - namespaces ssupport * OpenVZ, SWsoft Inc. * Pavel Emelianov + * + * General sysv ipc locking scheme: + * when doing ipc id lookups, take the ids->rwsem + * rcu_read_lock() + * obtain the ipc object (kern_ipc_perm) + * perform security, capabilities, auditing and permission checks, etc. + * acquire the ipc lock (kern_ipc_perm.lock) throught ipc_lock_object() + * perform data updates (ie: SET, RMID, LOCK/UNLOCK commands) */ #include @@ -119,7 +127,7 @@ __initcall(ipc_init); void ipc_init_ids(struct ipc_ids *ids) { - init_rwsem(&ids->rw_mutex); + init_rwsem(&ids->rwsem); ids->in_use = 0; ids->seq = 0; @@ -174,7 +182,7 @@ void __init ipc_init_proc_interface(const char *path, const char *header, * @ids: Identifier set * @key: The key to find * - * Requires ipc_ids.rw_mutex locked. + * Requires ipc_ids.rwsem locked. * Returns the LOCKED pointer to the ipc structure if found or NULL * if not. * If key is found ipc points to the owning ipc structure @@ -197,7 +205,8 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) continue; } - ipc_lock_by_ptr(ipc); + rcu_read_lock(); + ipc_lock_object(ipc); return ipc; } @@ -208,7 +217,7 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) * ipc_get_maxid - get the last assigned id * @ids: IPC identifier set * - * Called with ipc_ids.rw_mutex held. + * Called with ipc_ids.rwsem held. */ int ipc_get_maxid(struct ipc_ids *ids) @@ -246,9 +255,8 @@ int ipc_get_maxid(struct ipc_ids *ids) * is returned. The 'new' entry is returned in a locked state on success. * On failure the entry is not locked and a negative err-code is returned. * - * Called with ipc_ids.rw_mutex held as a writer. + * Called with writer ipc_ids.rwsem held. */ - int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) { kuid_t euid; @@ -313,9 +321,9 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids, { int err; - down_write(&ids->rw_mutex); + down_write(&ids->rwsem); err = ops->getnew(ns, params); - up_write(&ids->rw_mutex); + up_write(&ids->rwsem); return err; } @@ -332,7 +340,7 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids, * * On success, the IPC id is returned. * - * It is called with ipc_ids.rw_mutex and ipcp->lock held. + * It is called with ipc_ids.rwsem and ipcp->lock held. */ static int ipc_check_perms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, @@ -377,7 +385,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, * Take the lock as a writer since we are potentially going to add * a new entry + read locks are not "upgradable" */ - down_write(&ids->rw_mutex); + down_write(&ids->rwsem); ipcp = ipc_findkey(ids, params->key); if (ipcp == NULL) { /* key not used */ @@ -403,7 +411,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, } ipc_unlock(ipcp); } - up_write(&ids->rw_mutex); + up_write(&ids->rwsem); return err; } @@ -414,7 +422,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, * @ids: IPC identifier set * @ipcp: ipc perm structure containing the identifier to remove * - * ipc_ids.rw_mutex (as a writer) and the spinlock for this ID are held + * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held * before this function is called, and remain locked on the exit. */ @@ -466,13 +474,6 @@ void ipc_free(void* ptr, int size) kfree(ptr); } -struct ipc_rcu { - struct rcu_head rcu; - atomic_t refcount; - /* "void *" makes sure alignment of following data is sane. */ - void *data[0]; -}; - /** * ipc_rcu_alloc - allocate ipc and rcu space * @size: size desired @@ -489,35 +490,34 @@ void *ipc_rcu_alloc(int size) if (unlikely(!out)) return NULL; atomic_set(&out->refcount, 1); - return out->data; + return out + 1; } int ipc_rcu_getref(void *ptr) { - return atomic_inc_not_zero(&container_of(ptr, struct ipc_rcu, data)->refcount); -} + struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; -/** - * ipc_schedule_free - free ipc + rcu space - * @head: RCU callback structure for queued work - */ -static void ipc_schedule_free(struct rcu_head *head) -{ - vfree(container_of(head, struct ipc_rcu, rcu)); + return atomic_inc_not_zero(&p->refcount); } -void ipc_rcu_putref(void *ptr) +void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head)) { - struct ipc_rcu *p = container_of(ptr, struct ipc_rcu, data); + struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; if (!atomic_dec_and_test(&p->refcount)) return; - if (is_vmalloc_addr(ptr)) { - call_rcu(&p->rcu, ipc_schedule_free); - } else { - kfree_rcu(p, rcu); - } + call_rcu(&p->rcu, func); +} + +void ipc_rcu_free(struct rcu_head *head) +{ + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); + + if (is_vmalloc_addr(p)) + vfree(p); + else + kfree(p); } /** @@ -622,7 +622,7 @@ struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id) } /** - * ipc_lock - Lock an ipc structure without rw_mutex held + * ipc_lock - Lock an ipc structure without rwsem held * @ids: IPC identifier set * @id: ipc id to look for * @@ -678,22 +678,6 @@ out: return out; } -struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id) -{ - struct kern_ipc_perm *out; - - out = ipc_lock(ids, id); - if (IS_ERR(out)) - return out; - - if (ipc_checkid(out, id)) { - ipc_unlock(out); - return ERR_PTR(-EIDRM); - } - - return out; -} - /** * ipcget - Common sys_*get() code * @ns : namsepace @@ -734,7 +718,7 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) } /** - * ipcctl_pre_down - retrieve an ipc and check permissions for some IPC_XXX cmd + * ipcctl_pre_down_nolock - retrieve an ipc and check permissions for some IPC_XXX cmd * @ns: the ipc namespace * @ids: the table of ids where to look for the ipc * @id: the id of the ipc to retrieve @@ -747,39 +731,22 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) * It must be called without any lock held and * - retrieves the ipc with the given id in the given table. * - performs some audit and permission check, depending on the given cmd - * - returns the ipc with both ipc and rw_mutex locks held in case of success - * or an err-code without any lock held otherwise. + * - returns a pointer to the ipc object or otherwise, the corresponding error. + * + * Call holding the both the rwsem and the rcu read lock. */ -struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, - struct ipc_ids *ids, int id, int cmd, - struct ipc64_perm *perm, int extra_perm) -{ - struct kern_ipc_perm *ipcp; - - ipcp = ipcctl_pre_down_nolock(ns, ids, id, cmd, perm, extra_perm); - if (IS_ERR(ipcp)) - goto out; - - spin_lock(&ipcp->lock); -out: - return ipcp; -} - struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, - struct ipc_ids *ids, int id, int cmd, - struct ipc64_perm *perm, int extra_perm) + struct ipc_ids *ids, int id, int cmd, + struct ipc64_perm *perm, int extra_perm) { kuid_t euid; int err = -EPERM; struct kern_ipc_perm *ipcp; - down_write(&ids->rw_mutex); - rcu_read_lock(); - ipcp = ipc_obtain_object_check(ids, id); if (IS_ERR(ipcp)) { err = PTR_ERR(ipcp); - goto out_up; + goto err; } audit_ipc_obj(ipcp); @@ -790,16 +757,8 @@ struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, euid = current_euid(); if (uid_eq(euid, ipcp->cuid) || uid_eq(euid, ipcp->uid) || ns_capable(ns->user_ns, CAP_SYS_ADMIN)) - return ipcp; - -out_up: - /* - * Unsuccessful lookup, unlock and return - * the corresponding error. - */ - rcu_read_unlock(); - up_write(&ids->rw_mutex); - + return ipcp; /* successful lookup */ +err: return ERR_PTR(err); } @@ -856,7 +815,8 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, ipc = idr_find(&ids->ipcs_idr, pos); if (ipc != NULL) { *new_pos = pos + 1; - ipc_lock_by_ptr(ipc); + rcu_read_lock(); + ipc_lock_object(ipc); return ipc; } } @@ -894,7 +854,7 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos) * Take the lock - this will be released by the corresponding * call to stop(). */ - down_read(&ids->rw_mutex); + down_read(&ids->rwsem); /* pos < 0 is invalid */ if (*pos < 0) @@ -921,7 +881,7 @@ static void sysvipc_proc_stop(struct seq_file *s, void *it) ids = &iter->ns->ids[iface->ids]; /* Release the lock we took in start() */ - up_read(&ids->rw_mutex); + up_read(&ids->rwsem); } static int sysvipc_proc_show(struct seq_file *s, void *it) diff --git a/ipc/util.h b/ipc/util.h index 2b0bdd5..f2f5036 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -47,6 +47,13 @@ static inline void msg_exit_ns(struct ipc_namespace *ns) { } static inline void shm_exit_ns(struct ipc_namespace *ns) { } #endif +struct ipc_rcu { + struct rcu_head rcu; + atomic_t refcount; +} ____cacheline_aligned_in_smp; + +#define ipc_rcu_to_struct(p) ((void *)(p+1)) + /* * Structure that holds the parameters needed by the ipc operations * (see after) @@ -94,10 +101,10 @@ void __init ipc_init_proc_interface(const char *path, const char *header, #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER) #define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER) -/* must be called with ids->rw_mutex acquired for writing */ +/* must be called with ids->rwsem acquired for writing */ int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); -/* must be called with ids->rw_mutex acquired for reading */ +/* must be called with ids->rwsem acquired for reading */ int ipc_get_maxid(struct ipc_ids *); /* must be called with both locks acquired. */ @@ -120,7 +127,8 @@ void ipc_free(void* ptr, int size); */ void* ipc_rcu_alloc(int size); int ipc_rcu_getref(void *ptr); -void ipc_rcu_putref(void *ptr); +void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head)); +void ipc_rcu_free(struct rcu_head *head); struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id); @@ -131,9 +139,6 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out); struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, struct ipc_ids *ids, int id, int cmd, struct ipc64_perm *perm, int extra_perm); -struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, - struct ipc_ids *ids, int id, int cmd, - struct ipc64_perm *perm, int extra_perm); #ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION /* On IA-64, we always use the "64-bit version" of the IPC structures. */ @@ -159,24 +164,27 @@ static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid) return uid / SEQ_MULTIPLIER != ipcp->seq; } -static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm) +static inline void ipc_lock_object(struct kern_ipc_perm *perm) { - rcu_read_lock(); spin_lock(&perm->lock); } -static inline void ipc_unlock(struct kern_ipc_perm *perm) +static inline void ipc_unlock_object(struct kern_ipc_perm *perm) { spin_unlock(&perm->lock); - rcu_read_unlock(); } -static inline void ipc_lock_object(struct kern_ipc_perm *perm) +static inline void ipc_assert_locked_object(struct kern_ipc_perm *perm) { - spin_lock(&perm->lock); + assert_spin_locked(&perm->lock); +} + +static inline void ipc_unlock(struct kern_ipc_perm *perm) +{ + ipc_unlock_object(perm); + rcu_read_unlock(); } -struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id); struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id); int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, struct ipc_ops *ops, struct ipc_params *params); diff --git a/mm/shmem.c b/mm/shmem.c index 5e6a842..509b393 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2879,14 +2879,8 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range); /* common code */ -static char *shmem_dname(struct dentry *dentry, char *buffer, int buflen) -{ - return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)", - dentry->d_name.name); -} - static struct dentry_operations anon_ops = { - .d_dname = shmem_dname + .d_dname = simple_dname }; /** diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index b5375ed..aecf088 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -930,6 +930,14 @@ static void hdmi_setup_audio_infoframe(struct hda_codec *codec, } /* + * always configure channel mapping, it may have been changed by the + * user in the meantime + */ + hdmi_setup_channel_mapping(codec, pin_nid, non_pcm, ca, + channels, per_pin->chmap, + per_pin->chmap_set); + + /* * sizeof(ai) is used instead of sizeof(*hdmi_ai) or * sizeof(*dp_ai) to avoid partial match/update problems when * the user switches between HDMI/DP monitors. @@ -940,20 +948,10 @@ static void hdmi_setup_audio_infoframe(struct hda_codec *codec, "pin=%d channels=%d\n", pin_nid, channels); - hdmi_setup_channel_mapping(codec, pin_nid, non_pcm, ca, - channels, per_pin->chmap, - per_pin->chmap_set); hdmi_stop_infoframe_trans(codec, pin_nid); hdmi_fill_audio_infoframe(codec, pin_nid, ai.bytes, sizeof(ai)); hdmi_start_infoframe_trans(codec, pin_nid); - } else { - /* For non-pcm audio switch, setup new channel mapping - * accordingly */ - if (per_pin->non_pcm != non_pcm) - hdmi_setup_channel_mapping(codec, pin_nid, non_pcm, ca, - channels, per_pin->chmap, - per_pin->chmap_set); } per_pin->non_pcm = non_pcm; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 458cf89..21b6649 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3200,6 +3200,15 @@ static void alc269_fixup_limit_int_mic_boost(struct hda_codec *codec, } } +static void alc290_fixup_mono_speakers(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_PRE_PROBE) + /* Remove DAC node 0x03, as it seems to be + giving mono output */ + snd_hda_override_wcaps(codec, 0x03, 0); +} + enum { ALC269_FIXUP_SONY_VAIO, ALC275_FIXUP_SONY_VAIO_GPIO2, @@ -3223,9 +3232,12 @@ enum { ALC269_FIXUP_HP_GPIO_LED, ALC269_FIXUP_INV_DMIC, ALC269_FIXUP_LENOVO_DOCK, + ALC286_FIXUP_SONY_MIC_NO_PRESENCE, ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT, ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, ALC269_FIXUP_DELL2_MIC_NO_PRESENCE, + ALC269_FIXUP_DELL3_MIC_NO_PRESENCE, + ALC290_FIXUP_MONO_SPEAKERS, ALC269_FIXUP_HEADSET_MODE, ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, ALC269_FIXUP_ASUS_X101_FUNC, @@ -3412,6 +3424,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC }, + [ALC269_FIXUP_DELL3_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1a, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC + }, [ALC269_FIXUP_HEADSET_MODE] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode, @@ -3420,6 +3441,13 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode_no_hp_mic, }, + [ALC286_FIXUP_SONY_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + }, [ALC269_FIXUP_ASUS_X101_FUNC] = { .type = HDA_FIXUP_FUNC, .v.func = alc269_fixup_x101_headset_mic, @@ -3477,6 +3505,12 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc269_fixup_limit_int_mic_boost, }, + [ALC290_FIXUP_MONO_SPEAKERS] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc290_fixup_mono_speakers, + .chained = true, + .chain_id = ALC269_FIXUP_DELL3_MIC_NO_PRESENCE, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -3511,6 +3545,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0608, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0609, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0613, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_MONO_SPEAKERS), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), SND_PCI_QUIRK(0x103c, 0x18e6, "HP", ALC269_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x1973, "HP Pavilion", ALC269_FIXUP_HP_MUTE_LED_MIC1), @@ -3529,6 +3564,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x8398, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x83ce, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x8516, "ASUS X101CH", ALC269_FIXUP_ASUS_X101), + SND_PCI_QUIRK(0x104d, 0x90b6, "Sony VAIO Pro 13", ALC286_FIXUP_SONY_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x104d, 0x9073, "Sony VAIO", ALC275_FIXUP_SONY_VAIO_GPIO2), SND_PCI_QUIRK(0x104d, 0x907b, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ), SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ), @@ -4216,6 +4252,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x05d8, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), + SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_ASUS_MODE4), SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT), SND_PCI_QUIRK(0x105b, 0x0cd6, "Foxconn", ALC662_FIXUP_ASUS_MODE2), SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), diff --git a/sound/usb/usx2y/usbusx2yaudio.c b/sound/usb/usx2y/usbusx2yaudio.c index 0ce90337..cd69a80 100644 --- a/sound/usb/usx2y/usbusx2yaudio.c +++ b/sound/usb/usx2y/usbusx2yaudio.c @@ -299,19 +299,6 @@ static void usX2Y_error_urb_status(struct usX2Ydev *usX2Y, usX2Y_clients_stop(usX2Y); } -static void usX2Y_error_sequence(struct usX2Ydev *usX2Y, - struct snd_usX2Y_substream *subs, struct urb *urb) -{ - snd_printk(KERN_ERR -"Sequence Error!(hcd_frame=%i ep=%i%s;wait=%i,frame=%i).\n" -"Most probably some urb of usb-frame %i is still missing.\n" -"Cause could be too long delays in usb-hcd interrupt handling.\n", - usb_get_current_frame_number(usX2Y->dev), - subs->endpoint, usb_pipein(urb->pipe) ? "in" : "out", - usX2Y->wait_iso_frame, urb->start_frame, usX2Y->wait_iso_frame); - usX2Y_clients_stop(usX2Y); -} - static void i_usX2Y_urb_complete(struct urb *urb) { struct snd_usX2Y_substream *subs = urb->context; @@ -328,12 +315,9 @@ static void i_usX2Y_urb_complete(struct urb *urb) usX2Y_error_urb_status(usX2Y, subs, urb); return; } - if (likely((urb->start_frame & 0xFFFF) == (usX2Y->wait_iso_frame & 0xFFFF))) - subs->completed_urb = urb; - else { - usX2Y_error_sequence(usX2Y, subs, urb); - return; - } + + subs->completed_urb = urb; + { struct snd_usX2Y_substream *capsubs = usX2Y->subs[SNDRV_PCM_STREAM_CAPTURE], *playbacksubs = usX2Y->subs[SNDRV_PCM_STREAM_PLAYBACK]; diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c index f2a1acd..814d0e8 100644 --- a/sound/usb/usx2y/usx2yhwdeppcm.c +++ b/sound/usb/usx2y/usx2yhwdeppcm.c @@ -244,13 +244,8 @@ static void i_usX2Y_usbpcm_urb_complete(struct urb *urb) usX2Y_error_urb_status(usX2Y, subs, urb); return; } - if (likely((urb->start_frame & 0xFFFF) == (usX2Y->wait_iso_frame & 0xFFFF))) - subs->completed_urb = urb; - else { - usX2Y_error_sequence(usX2Y, subs, urb); - return; - } + subs->completed_urb = urb; capsubs = usX2Y->subs[SNDRV_PCM_STREAM_CAPTURE]; capsubs2 = usX2Y->subs[SNDRV_PCM_STREAM_CAPTURE + 2]; playbacksubs = usX2Y->subs[SNDRV_PCM_STREAM_PLAYBACK];