diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index ec6a939..eea7102 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -502,23 +502,31 @@ prototypes: void (*open)(struct vm_area_struct*); void (*close)(struct vm_area_struct*); int (*fault)(struct vm_area_struct*, struct vm_fault *); - int (*page_mkwrite)(struct vm_area_struct *, struct page *); + int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *); int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); locking rules: BKL mmap_sem PageLocked(page) open: no yes close: no yes -fault: no yes -page_mkwrite: no yes no +fault: no yes can return with page locked +page_mkwrite: no yes can return with page locked access: no yes - ->page_mkwrite() is called when a previously read-only page is -about to become writeable. The file system is responsible for -protecting against truncate races. Once appropriate action has been -taking to lock out truncate, the page range should be verified to be -within i_size. The page mapping should also be checked that it is not -NULL. + ->fault() is called when a previously not present pte is about +to be faulted in. The filesystem must find and return the page associated +with the passed in "pgoff" in the vm_fault structure. If it is possible that +the page may be truncated and/or invalidated, then the filesystem must lock +the page, then ensure it is not already truncated (the page lock will block +subsequent truncate), and then return with VM_FAULT_LOCKED, and the page +locked. The VM will unlock the page. + + ->page_mkwrite() is called when a previously read-only pte is +about to become writeable. The filesystem again must ensure that there are +no truncate/invalidate races, and then return with the page locked. If +the page has been truncated, the filesystem should not look up a new page +like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which +will cause the VM to retry the fault. ->access() is called when get_user_pages() fails in acces_process_vm(), typically used to debug a process through diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index da56821..dd8eeea 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -411,7 +411,6 @@ static __init int svm_hardware_setup(void) iopm_va = page_address(iopm_pages); memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); - clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */ iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; if (boot_cpu_has(X86_FEATURE_NX)) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 425423e..92f0457 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1075,9 +1075,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static int is_efer_nx(void) { - u64 efer; + unsigned long long efer = 0; - rdmsrl(MSR_EFER, efer); + rdmsrl_safe(MSR_EFER, &efer); return efer & EFER_NX; } diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index e190d8b..7ffc5ac 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -410,9 +410,7 @@ static int __init dmatest_init(void) chan = dma_request_channel(mask, filter, NULL); if (chan) { err = dmatest_add_channel(chan); - if (err == 0) - continue; - else { + if (err) { dma_release_channel(chan); break; /* add_channel failed, punt */ } diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 8851197..700ebec 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -110,6 +110,11 @@ #define USB_VENDOR_ID_BERKSHIRE 0x0c98 #define USB_DEVICE_ID_BERKSHIRE_PCWD 0x1140 +#define USB_VENDOR_ID_CH 0x068e +#define USB_DEVICE_ID_CH_PRO_PEDALS 0x00f2 +#define USB_DEVICE_ID_CH_COMBATSTICK 0x00f4 +#define USB_DEVICE_ID_CH_FLIGHT_SIM_YOKE 0x00ff + #define USB_VENDOR_ID_CHERRY 0x046a #define USB_DEVICE_ID_CHERRY_CYMOTION 0x0023 diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 4391717..d8f7423 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -50,6 +50,9 @@ static const struct hid_blacklist { { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_2PORTKVM, HID_QUIRK_NOGET }, { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVM, HID_QUIRK_NOGET }, { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVMC, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_COMBATSTICK, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_YOKE, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_PEDALS, HID_QUIRK_NOGET }, { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET }, { USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET }, { USB_VENDOR_ID_SUN, USB_DEVICE_ID_RARITAN_KVM_DONGLE, HID_QUIRK_NOGET }, diff --git a/drivers/hwmon/w83781d.c b/drivers/hwmon/w83781d.c index dbfb30c..0bdab95 100644 --- a/drivers/hwmon/w83781d.c +++ b/drivers/hwmon/w83781d.c @@ -1462,7 +1462,8 @@ static struct w83781d_data *w83781d_update_device(struct device *dev) data->pwm[i] = w83781d_read_value(data, W83781D_REG_PWM[i]); - if ((data->type != w83782d || !client->driver) + /* Only W83782D on SMBus has PWM3 and PWM4 */ + if ((data->type != w83782d || !client) && i == 1) break; } diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c index eb8f72c..0e034a4 100644 --- a/drivers/i2c/algos/i2c-algo-bit.c +++ b/drivers/i2c/algos/i2c-algo-bit.c @@ -104,7 +104,7 @@ static int sclhi(struct i2c_algo_bit_data *adap) * chips may hold it low ("clock stretching") while they * are processing data internally. */ - if (time_after_eq(jiffies, start + adap->timeout)) + if (time_after(jiffies, start + adap->timeout)) return -ETIMEDOUT; cond_resched(); } diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c index d50b329..2346a89 100644 --- a/drivers/i2c/algos/i2c-algo-pca.c +++ b/drivers/i2c/algos/i2c-algo-pca.c @@ -270,10 +270,21 @@ static int pca_xfer(struct i2c_adapter *i2c_adap, case 0x30: /* Data byte in I2CDAT has been transmitted; NOT ACK has been received */ DEB2("NOT ACK received after data byte\n"); + pca_stop(adap); goto out; case 0x38: /* Arbitration lost during SLA+W, SLA+R or data bytes */ DEB2("Arbitration lost\n"); + /* + * The PCA9564 data sheet (2006-09-01) says "A + * START condition will be transmitted when the + * bus becomes free (STOP or SCL and SDA high)" + * when the STA bit is set (p. 11). + * + * In case this won't work, try pca_reset() + * instead. + */ + pca_start(adap); goto out; case 0x58: /* Data byte has been received; NOT ACK has been returned */ diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 7199437..b411249 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -985,6 +985,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) oldindex = index; oldpage = page; + bitmap->filemap[bitmap->file_pages++] = page; + bitmap->last_page_size = count; + if (outofdate) { /* * if bitmap is out of date, dirty the @@ -997,15 +1000,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) write_page(bitmap, page, 1); ret = -EIO; - if (bitmap->flags & BITMAP_WRITE_ERROR) { - /* release, page not in filemap yet */ - put_page(page); + if (bitmap->flags & BITMAP_WRITE_ERROR) goto err; - } } - - bitmap->filemap[bitmap->file_pages++] = page; - bitmap->last_page_size = count; } paddr = kmap_atomic(page, KM_USER0); if (bitmap->flags & BITMAP_HOSTENDIAN) @@ -1015,9 +1012,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) kunmap_atomic(paddr, KM_USER0); if (b) { /* if the disk bit is set, set the memory bit */ - bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap), - ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start) - ); + int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) + >= start); + bitmap_set_memory_bits(bitmap, + (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap), + needed); bit_cnt++; set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); } @@ -1153,8 +1152,9 @@ void bitmap_daemon_work(struct bitmap *bitmap) spin_lock_irqsave(&bitmap->lock, flags); clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); } - bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), - &blocks, 0); + bmc = bitmap_get_counter(bitmap, + (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), + &blocks, 0); if (bmc) { /* if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); @@ -1168,7 +1168,8 @@ void bitmap_daemon_work(struct bitmap *bitmap) } else if (*bmc == 1) { /* we can clear the bit */ *bmc = 0; - bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), + bitmap_count_page(bitmap, + (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), -1); /* clear the bit */ @@ -1484,7 +1485,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) unsigned long chunk; for (chunk = s; chunk <= e; chunk++) { - sector_t sec = chunk << CHUNK_BLOCK_SHIFT(bitmap); + sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap); bitmap_set_memory_bits(bitmap, sec, 1); bitmap_file_set_bit(bitmap, sec); } diff --git a/drivers/md/md.c b/drivers/md/md.c index a307f87..dc85211 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2844,11 +2844,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) } else err = -EBUSY; spin_unlock_irq(&mddev->write_lock); - } else { - mddev->ro = 0; - mddev->recovery_cp = MaxSector; - err = do_md_run(mddev); - } + } else + err = -EINVAL; break; case active: if (mddev->pers) { diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7301631..d849533 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1807,17 +1807,17 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i r10_bio->sector = sect; raid10_find_phys(conf, r10_bio); - /* Need to check if this section will still be + + /* Need to check if the array will still be * degraded */ - for (j=0; jcopies;j++) { - int d = r10_bio->devs[j].devnum; - if (conf->mirrors[d].rdev == NULL || - test_bit(Faulty, &conf->mirrors[d].rdev->flags)) { + for (j=0; jraid_disks; j++) + if (conf->mirrors[j].rdev == NULL || + test_bit(Faulty, &conf->mirrors[j].rdev->flags)) { still_degraded = 1; break; } - } + must_sync = bitmap_start_sync(mddev->bitmap, sect, &sync_blocks, still_degraded); diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 6bd63cc..d436e27 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -3712,7 +3712,7 @@ static irqreturn_t e1000_intr(int irq, void *data) struct e1000_hw *hw = &adapter->hw; u32 rctl, icr = er32(ICR); - if (unlikely((!icr) || test_bit(__E1000_RESETTING, &adapter->flags))) + if (unlikely((!icr) || test_bit(__E1000_DOWN, &adapter->flags))) return IRQ_NONE; /* Not our interrupt */ /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c index dfe9226..9a59414 100644 --- a/drivers/net/ehea/ehea_main.c +++ b/drivers/net/ehea/ehea_main.c @@ -529,14 +529,17 @@ static inline struct sk_buff *get_skb_by_index(struct sk_buff **skb_array, x &= (arr_len - 1); pref = skb_array[x]; - prefetchw(pref); - prefetchw(pref + EHEA_CACHE_LINE); - - pref = (skb_array[x]->data); - prefetch(pref); - prefetch(pref + EHEA_CACHE_LINE); - prefetch(pref + EHEA_CACHE_LINE * 2); - prefetch(pref + EHEA_CACHE_LINE * 3); + if (pref) { + prefetchw(pref); + prefetchw(pref + EHEA_CACHE_LINE); + + pref = (skb_array[x]->data); + prefetch(pref); + prefetch(pref + EHEA_CACHE_LINE); + prefetch(pref + EHEA_CACHE_LINE * 2); + prefetch(pref + EHEA_CACHE_LINE * 3); + } + skb = skb_array[skb_index]; skb_array[skb_index] = NULL; return skb; @@ -553,12 +556,14 @@ static inline struct sk_buff *get_skb_by_index_ll(struct sk_buff **skb_array, x &= (arr_len - 1); pref = skb_array[x]; - prefetchw(pref); - prefetchw(pref + EHEA_CACHE_LINE); + if (pref) { + prefetchw(pref); + prefetchw(pref + EHEA_CACHE_LINE); - pref = (skb_array[x]->data); - prefetchw(pref); - prefetchw(pref + EHEA_CACHE_LINE); + pref = (skb_array[x]->data); + prefetchw(pref); + prefetchw(pref + EHEA_CACHE_LINE); + } skb = skb_array[wqe_index]; skb_array[wqe_index] = NULL; diff --git a/drivers/net/ne2k-pci.c b/drivers/net/ne2k-pci.c index f090d3b..453d6bb 100644 --- a/drivers/net/ne2k-pci.c +++ b/drivers/net/ne2k-pci.c @@ -373,18 +373,17 @@ static int __devinit ne2k_pci_init_one (struct pci_dev *pdev, dev->ethtool_ops = &ne2k_pci_ethtool_ops; NS8390_init(dev, 0); + memcpy(dev->dev_addr, SA_prom, 6); + memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); + i = register_netdev(dev); if (i) goto err_out_free_netdev; - for(i = 0; i < 6; i++) - dev->dev_addr[i] = SA_prom[i]; printk("%s: %s found at %#lx, IRQ %d, %pM.\n", dev->name, pci_clone_list[chip_idx].name, ioaddr, dev->irq, dev->dev_addr); - memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); - return 0; err_out_free_netdev: diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c index 0c3a2ab..28d2c8d 100644 --- a/drivers/serial/mpc52xx_uart.c +++ b/drivers/serial/mpc52xx_uart.c @@ -522,7 +522,7 @@ mpc52xx_uart_startup(struct uart_port *port) /* Request IRQ */ ret = request_irq(port->irq, mpc52xx_uart_int, - IRQF_DISABLED | IRQF_SAMPLE_RANDOM | IRQF_SHARED, + IRQF_DISABLED | IRQF_SAMPLE_RANDOM, "mpc52xx_psc_uart", port); if (ret) return ret; diff --git a/drivers/usb/gadget/usbstring.c b/drivers/usb/gadget/usbstring.c index 4154be3..58c4d37 100644 --- a/drivers/usb/gadget/usbstring.c +++ b/drivers/usb/gadget/usbstring.c @@ -38,7 +38,7 @@ static int utf8_to_utf16le(const char *s, __le16 *cp, unsigned len) uchar = (c & 0x1f) << 6; c = (u8) *s++; - if ((c & 0xc0) != 0xc0) + if ((c & 0xc0) != 0x80) goto fail; c &= 0x3f; uchar |= c; @@ -49,13 +49,13 @@ static int utf8_to_utf16le(const char *s, __le16 *cp, unsigned len) uchar = (c & 0x0f) << 12; c = (u8) *s++; - if ((c & 0xc0) != 0xc0) + if ((c & 0xc0) != 0x80) goto fail; c &= 0x3f; uchar |= c << 6; c = (u8) *s++; - if ((c & 0xc0) != 0xc0) + if ((c & 0xc0) != 0x80) goto fail; c &= 0x3f; uchar |= c; diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index bb3143e..5daa517 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -56,6 +56,7 @@ static __u16 vendor = FTDI_VID; static __u16 product; struct ftdi_private { + struct kref kref; ftdi_chip_type_t chip_type; /* type of device, either SIO or FT8U232AM */ int baud_base; /* baud base clock for divisor setting */ @@ -1352,6 +1353,7 @@ static int ftdi_sio_port_probe(struct usb_serial_port *port) return -ENOMEM; } + kref_init(&priv->kref); spin_lock_init(&priv->rx_lock); spin_lock_init(&priv->tx_lock); init_waitqueue_head(&priv->delta_msr_wait); @@ -1468,6 +1470,13 @@ static void ftdi_shutdown(struct usb_serial *serial) dbg("%s", __func__); } +static void ftdi_sio_priv_release(struct kref *k) +{ + struct ftdi_private *priv = container_of(k, struct ftdi_private, kref); + + kfree(priv); +} + static int ftdi_sio_port_remove(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); @@ -1482,7 +1491,7 @@ static int ftdi_sio_port_remove(struct usb_serial_port *port) if (priv) { usb_set_serial_port_data(port, NULL); - kfree(priv); + kref_put(&priv->kref, ftdi_sio_priv_release); } return 0; @@ -1547,7 +1556,8 @@ static int ftdi_open(struct tty_struct *tty, dev_err(&port->dev, "%s - failed submitting read urb, error %d\n", __func__, result); - + else + kref_get(&priv->kref); return result; } /* ftdi_open */ @@ -1589,11 +1599,11 @@ static void ftdi_close(struct tty_struct *tty, mutex_unlock(&port->serial->disc_mutex); /* cancel any scheduled reading */ - cancel_delayed_work(&priv->rx_work); - flush_scheduled_work(); + cancel_delayed_work_sync(&priv->rx_work); /* shutdown our bulk read */ usb_kill_urb(port->read_urb); + kref_put(&priv->kref, ftdi_sio_priv_release); } /* ftdi_close */ diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c index 0820265..0a7a667 100644 --- a/drivers/video/fb_defio.c +++ b/drivers/video/fb_defio.c @@ -85,8 +85,9 @@ EXPORT_SYMBOL_GPL(fb_deferred_io_fsync); /* vm_ops->page_mkwrite handler */ static int fb_deferred_io_mkwrite(struct vm_area_struct *vma, - struct page *page) + struct vm_fault *vmf) { + struct page *page = vmf->page; struct fb_info *info = vma->vm_private_data; struct fb_deferred_io *fbdefio = info->fbdefio; struct page *cur; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5e1d4e3..7dd1b6d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2060,7 +2060,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index); -int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); +int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); void btrfs_put_inode(struct inode *inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7d4f948..17e608c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4292,8 +4292,9 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) * beyond EOF, then the page is guaranteed safe against truncation until we * unlock the page. */ -int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) +int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct inode *inode = fdentry(vma->vm_file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; @@ -4306,10 +4307,15 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) u64 page_end; ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); - if (ret) + if (ret) { + if (ret == -ENOMEM) + ret = VM_FAULT_OOM; + else /* -ENOSPC, -EIO, etc */ + ret = VM_FAULT_SIGBUS; goto out; + } - ret = -EINVAL; + ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ again: lock_page(page); size = i_size_read(inode); diff --git a/fs/buffer.c b/fs/buffer.c index 891e1c7..4eb8992 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2465,20 +2465,22 @@ int block_commit_write(struct page *page, unsigned from, unsigned to) * unlock the page. */ int -block_page_mkwrite(struct vm_area_struct *vma, struct page *page, +block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block) { + struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; unsigned long end; loff_t size; - int ret = -EINVAL; + int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ lock_page(page); size = i_size_read(inode); if ((page->mapping != inode->i_mapping) || (page_offset(page) > size)) { /* page got truncated out from underneath us */ - goto out_unlock; + unlock_page(page); + goto out; } /* page is wholly or partially inside EOF */ @@ -2491,8 +2493,16 @@ block_page_mkwrite(struct vm_area_struct *vma, struct page *page, if (!ret) ret = block_commit_write(page, 0, end); -out_unlock: - unlock_page(page); + if (unlikely(ret)) { + unlock_page(page); + if (ret == -ENOMEM) + ret = VM_FAULT_OOM; + else /* -ENOSPC, -EIO, etc */ + ret = VM_FAULT_SIGBUS; + } else + ret = VM_FAULT_LOCKED; + +out: return ret; } diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 14eb9a2..604ce8a 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -64,6 +64,13 @@ int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *); #endif /* + * To be safe - for UCS to UTF-8 with strings loaded with the rare long + * characters alloc more to account for such multibyte target UTF-8 + * characters. + */ +#define UNICODE_NAME_MAX ((4 * NAME_MAX) + 2) + +/* * UniStrcat: Concatenate the second string to the first * * Returns: diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 71ae000..4fbb6b5 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -91,23 +91,22 @@ static int cifs_strncpy_to_host(char **dst, const char *src, const int maxlen, const bool is_unicode, const struct nls_table *nls_codepage) { - int plen; + int src_len, dst_len; if (is_unicode) { - plen = UniStrnlen((wchar_t *)src, maxlen); - *dst = kmalloc(plen + 2, GFP_KERNEL); + src_len = UniStrnlen((wchar_t *)src, maxlen); + *dst = kmalloc((4 * src_len) + 2, GFP_KERNEL); if (!*dst) goto cifs_strncpy_to_host_ErrExit; - cifs_strfromUCS_le(*dst, (__le16 *)src, plen, nls_codepage); + dst_len = cifs_strfromUCS_le(*dst, (__le16 *)src, src_len, nls_codepage); + (*dst)[dst_len + 1] = 0; } else { - plen = strnlen(src, maxlen); - *dst = kmalloc(plen + 2, GFP_KERNEL); + src_len = strnlen(src, maxlen); + *dst = kmalloc(src_len + 1, GFP_KERNEL); if (!*dst) goto cifs_strncpy_to_host_ErrExit; - strncpy(*dst, src, plen); + strlcpy(*dst, src, src_len + 1); } - (*dst)[plen] = 0; - (*dst)[plen+1] = 0; /* harmless for ASCII case, needed for Unicode */ return 0; cifs_strncpy_to_host_ErrExit: diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4b64f39..0344b26 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3667,16 +3667,12 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, BCC(smb_buffer_response)) { kfree(tcon->nativeFileSystem); tcon->nativeFileSystem = - kzalloc(2*(length + 1), GFP_KERNEL); + kzalloc((4 * length) + 2, GFP_KERNEL); if (tcon->nativeFileSystem) cifs_strfromUCS_le( tcon->nativeFileSystem, (__le16 *) bcc_ptr, length, nls_codepage); - bcc_ptr += 2 * length; - bcc_ptr[0] = 0; /* null terminate the string */ - bcc_ptr[1] = 0; - bcc_ptr += 2; } /* else do not bother copying these information fields*/ } else { diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 4c89c57..b2990b1 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -691,14 +691,15 @@ cifs_convertUCSpath(char *target, const __le16 *source, int maxlen, NLS_MAX_CHARSET_SIZE); if (len > 0) { j += len; - continue; + goto overrun_chk; } else { target[j] = '?'; } } j++; /* make sure we do not overrun callers allocated temp buffer */ - if (j >= (2 * NAME_MAX)) +overrun_chk: + if (j >= UNICODE_NAME_MAX) break; } cUCS_out: diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index c2c01ff..0bdd5a6 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -1072,7 +1072,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) with the rare long characters alloc more to account for such multibyte target UTF-8 characters. cifs_unicode.c, which actually does the conversion, has the same limit */ - tmp_buf = kmalloc((2 * NAME_MAX) + 4, GFP_KERNEL); + tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL); for (i = 0; (i < num_to_fill) && (rc == 0); i++) { if (current_entry == NULL) { /* evaluate whether this case is an error */ diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 5c68b42..23e8f99 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -111,7 +111,7 @@ static __le16 get_next_vcnum(struct cifsSesInfo *ses) get_vc_num_exit: write_unlock(&cifs_tcp_ses_lock); - return le16_to_cpu(vcnum); + return cpu_to_le16(vcnum); } static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB) @@ -285,27 +285,26 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft, int words_left, len; char *data = *pbcc_area; - - cFYI(1, ("bleft %d", bleft)); - - /* SMB header is unaligned, so cifs servers word align start of - Unicode strings */ - data++; - bleft--; /* Windows servers do not always double null terminate - their final Unicode string - in which case we - now will not attempt to decode the byte of junk - which follows it */ + /* + * Windows servers do not always double null terminate their final + * Unicode string. Check to see if there are an uneven number of bytes + * left. If so, then add an extra NULL pad byte to the end of the + * response. + * + * See section 2.7.2 in "Implementing CIFS" for details + */ + if (bleft % 2) { + data[bleft] = 0; + ++bleft; + } words_left = bleft / 2; /* save off server operating system */ len = UniStrnlen((wchar_t *) data, words_left); -/* We look for obvious messed up bcc or strings in response so we do not go off - the end since (at least) WIN2K and Windows XP have a major bug in not null - terminating last Unicode string in response */ if (len >= words_left) return rc; @@ -343,13 +342,10 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft, return rc; kfree(ses->serverDomain); - ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */ - if (ses->serverDomain != NULL) { + ses->serverDomain = kzalloc((4 * len) + 2, GFP_KERNEL); + if (ses->serverDomain != NULL) cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len, nls_cp); - ses->serverDomain[2*len] = 0; - ses->serverDomain[(2*len) + 1] = 0; - } data += 2 * (len + 1); words_left -= len + 1; @@ -702,12 +698,18 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, } /* BB check if Unicode and decode strings */ - if (smb_buf->Flags2 & SMBFLG2_UNICODE) + if (smb_buf->Flags2 & SMBFLG2_UNICODE) { + /* unicode string area must be word-aligned */ + if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) { + ++bcc_ptr; + --bytes_remaining; + } rc = decode_unicode_ssetup(&bcc_ptr, bytes_remaining, - ses, nls_cp); - else + ses, nls_cp); + } else { rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); + } ssetup_exit: if (spnego_key) { diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 011b9b8..e323e47 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1136,7 +1136,7 @@ error_return: SYSCALL_DEFINE1(epoll_create, int, size) { - if (size < 0) + if (size <= 0) return -EINVAL; return sys_epoll_create1(0); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b0c87dc..90909f9 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1097,7 +1097,7 @@ extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from); -extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); +extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); /* ioctl.c */ extern long ext4_ioctl(struct file *, unsigned int, unsigned long); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c7fed5b..2c0439d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5116,8 +5116,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) return !buffer_mapped(bh); } -int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) +int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; loff_t size; unsigned long len; int ret = -EINVAL; @@ -5169,6 +5170,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) goto out_unlock; ret = 0; out_unlock: + if (ret) + ret = VM_FAULT_SIGBUS; up_read(&inode->i_alloc_sem); return ret; } diff --git a/fs/fcntl.c b/fs/fcntl.c index bd215cc..fc2aaa6 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -117,11 +117,13 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) { if (unlikely(newfd == oldfd)) { /* corner case */ struct files_struct *files = current->files; + int retval = oldfd; + rcu_read_lock(); if (!fcheck_files(files, oldfd)) - oldfd = -EBADF; + retval = -EBADF; rcu_read_unlock(); - return oldfd; + return retval; } return sys_dup3(oldfd, newfd, 0); } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 821d10f..4e340fe 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1234,8 +1234,9 @@ static void fuse_vma_close(struct vm_area_struct *vma) * - sync(2) * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER */ -static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; /* * Don't use page->mapping as it may become NULL from a * concurrent truncate. diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 459b73d..75ca5ac 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -908,6 +908,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) err_put_root: dput(root_dentry); err_put_conn: + bdi_destroy(&fc->bdi); fuse_conn_put(fc); err_fput: fput(file); diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 93fe41b..0093a33 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -336,8 +336,9 @@ static int gfs2_allocate_page_backing(struct page *page) * blocks allocated on disk to back that page. */ -static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -409,6 +410,10 @@ out_unlock: gfs2_glock_dq(&gh); out: gfs2_holder_uninit(&gh); + if (ret == -ENOMEM) + ret = VM_FAULT_OOM; + else if (ret) + ret = VM_FAULT_SIGBUS; return ret; } diff --git a/fs/ioctl.c b/fs/ioctl.c index 240ec63..344d9f3 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -258,7 +258,7 @@ int __generic_block_fiemap(struct inode *inode, long long length = 0, map_len = 0; u64 logical = 0, phys = 0, size = 0; u32 flags = FIEMAP_EXTENT_MERGED; - int ret = 0; + int ret = 0, past_eof = 0, whole_file = 0; if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))) return ret; @@ -266,6 +266,9 @@ int __generic_block_fiemap(struct inode *inode, start_blk = logical_to_blk(inode, start); length = (long long)min_t(u64, len, i_size_read(inode)); + if (length < len) + whole_file = 1; + map_len = length; do { @@ -282,11 +285,26 @@ int __generic_block_fiemap(struct inode *inode, /* HOLE */ if (!buffer_mapped(&tmp)) { + length -= blk_to_logical(inode, 1); + start_blk++; + + /* + * we want to handle the case where there is an + * allocated block at the front of the file, and then + * nothing but holes up to the end of the file properly, + * to make sure that extent at the front gets properly + * marked with FIEMAP_EXTENT_LAST + */ + if (!past_eof && + blk_to_logical(inode, start_blk) >= + blk_to_logical(inode, 0)+i_size_read(inode)) + past_eof = 1; + /* * first hole after going past the EOF, this is our * last extent */ - if (length <= 0) { + if (past_eof && size) { flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST; ret = fiemap_fill_next_extent(fieinfo, logical, phys, size, @@ -294,15 +312,37 @@ int __generic_block_fiemap(struct inode *inode, break; } - length -= blk_to_logical(inode, 1); - /* if we have holes up to/past EOF then we're done */ - if (length <= 0) + if (length <= 0 || past_eof) break; - - start_blk++; } else { - if (length <= 0 && size) { + /* + * we have gone over the length of what we wanted to + * map, and it wasn't the entire file, so add the extent + * we got last time and exit. + * + * This is for the case where say we want to map all the + * way up to the second to the last block in a file, but + * the last block is a hole, making the second to last + * block FIEMAP_EXTENT_LAST. In this case we want to + * see if there is a hole after the second to last block + * so we can mark it properly. If we found data after + * we exceeded the length we were requesting, then we + * are good to go, just add the extent to the fieinfo + * and break + */ + if (length <= 0 && !whole_file) { + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, + flags); + break; + } + + /* + * if size != 0 then we know we already have an extent + * to add, so add it. + */ + if (size) { ret = fiemap_fill_next_extent(fieinfo, logical, phys, size, flags); @@ -319,19 +359,14 @@ int __generic_block_fiemap(struct inode *inode, start_blk += logical_to_blk(inode, size); /* - * if we are past the EOF we need to loop again to see - * if there is a hole so we can mark this extent as the - * last one, and if not keep mapping things until we - * find a hole, or we run out of slots in the extent - * array + * If we are past the EOF, then we need to make sure as + * soon as we find a hole that the last extent we found + * is marked with FIEMAP_EXTENT_LAST */ - if (length <= 0) - continue; - - ret = fiemap_fill_next_extent(fieinfo, logical, phys, - size, flags); - if (ret) - break; + if (!past_eof && + logical+size >= + blk_to_logical(inode, 0)+i_size_read(inode)) + past_eof = 1; } cond_resched(); } while (1); diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 64f1c31..38af057 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -115,6 +115,16 @@ static void set_grace_period(void) schedule_delayed_work(&grace_period_end, grace_period); } +static void restart_grace(void) +{ + if (nlmsvc_ops) { + cancel_delayed_work_sync(&grace_period_end); + locks_end_grace(&lockd_manager); + nlmsvc_invalidate_all(); + set_grace_period(); + } +} + /* * This is the lockd kernel thread */ @@ -160,10 +170,7 @@ lockd(void *vrqstp) if (signalled()) { flush_signals(current); - if (nlmsvc_ops) { - nlmsvc_invalidate_all(); - set_grace_period(); - } + restart_grace(); continue; } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 672368f..3b2f697 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1624,8 +1624,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, } else if (atomic_read(&new_dentry->d_count) > 1) /* dentry still busy? */ goto out; - } else - nfs_drop_nlink(new_inode); + } go_ahead: /* @@ -1638,10 +1637,8 @@ go_ahead: } nfs_inode_return_delegation(old_inode); - if (new_inode != NULL) { + if (new_inode != NULL) nfs_inode_return_delegation(new_inode); - d_delete(new_dentry); - } error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, new_dir, &new_dentry->d_name); @@ -1650,6 +1647,8 @@ out: if (rehash) d_rehash(rehash); if (!error) { + if (new_inode != NULL) + nfs_drop_nlink(new_inode); d_move(old_dentry, new_dentry); nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir)); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 90f292b..523e7e0 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -451,8 +451,9 @@ const struct address_space_operations nfs_file_aops = { .launder_page = nfs_launder_page, }; -static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct file *filp = vma->vm_file; struct dentry *dentry = filp->f_path.dentry; unsigned pagelen; @@ -479,11 +480,11 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) goto out_unlock; ret = nfs_updatepage(filp, page, 0, pagelen); - if (ret == 0) - ret = pagelen; out_unlock: + if (!ret) + return VM_FAULT_LOCKED; unlock_page(page); - return ret; + return VM_FAULT_SIGBUS; } static struct vm_operations_struct nfs_file_vm_ops = { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 9250067..4c5fb99 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1843,6 +1843,15 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); if (IS_ERR(dentry)) return nfserrno(PTR_ERR(dentry)); + if (!dentry->d_inode) { + /* + * nfsd_buffered_readdir drops the i_mutex between + * readdir and calling this callback, leaving a window + * where this directory entry could have gone away. + */ + dput(dentry); + return nfserr_noent; + } exp_get(exp); /* @@ -1905,6 +1914,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common); int buflen; __be32 *p = cd->buffer; + __be32 *cookiep; __be32 nfserr = nfserr_toosmall; /* In nfsv4, "." and ".." never make it onto the wire.. */ @@ -1921,7 +1931,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, goto fail; *p++ = xdr_one; /* mark entry present */ - cd->offset = p; /* remember pointer */ + cookiep = p; p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ p = xdr_encode_array(p, name, namlen); /* name length & name */ @@ -1935,6 +1945,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, goto fail; case nfserr_dropit: goto fail; + case nfserr_noent: + goto skip_entry; default: /* * If the client requested the RDATTR_ERROR attribute, @@ -1953,6 +1965,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, } cd->buflen -= (p - cd->buffer); cd->buffer = p; + cd->offset = cookiep; +skip_entry: cd->common.err = nfs_ok; return 0; fail: diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8672b95..c2a87c8 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1912,6 +1912,22 @@ out_sems: return written ? written : ret; } +static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, + struct file *out, + struct splice_desc *sd) +{ + int ret; + + ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, + sd->total_len, 0, NULL); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + return splice_from_pipe_feed(pipe, sd, pipe_to_file); +} + static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, @@ -1919,38 +1935,76 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, unsigned int flags) { int ret; - struct inode *inode = out->f_path.dentry->d_inode; + struct address_space *mapping = out->f_mapping; + struct inode *inode = mapping->host; + struct splice_desc sd = { + .total_len = len, + .flags = flags, + .pos = *ppos, + .u.file = out, + }; mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, (unsigned int)len, out->f_path.dentry->d_name.len, out->f_path.dentry->d_name.name); - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); + if (pipe->inode) + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); - ret = ocfs2_rw_lock(inode, 1); - if (ret < 0) { - mlog_errno(ret); - goto out; - } + splice_from_pipe_begin(&sd); + do { + ret = splice_from_pipe_next(pipe, &sd); + if (ret <= 0) + break; - ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0, - NULL); - if (ret < 0) { - mlog_errno(ret); - goto out_unlock; - } + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); + ret = ocfs2_rw_lock(inode, 1); + if (ret < 0) + mlog_errno(ret); + else { + ret = ocfs2_splice_to_file(pipe, out, &sd); + ocfs2_rw_unlock(inode, 1); + } + mutex_unlock(&inode->i_mutex); + } while (ret > 0); + splice_from_pipe_end(pipe, &sd); if (pipe->inode) - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); - ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); - if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); -out_unlock: - ocfs2_rw_unlock(inode, 1); -out: - mutex_unlock(&inode->i_mutex); + if (sd.num_spliced) + ret = sd.num_spliced; + + if (ret > 0) { + unsigned long nr_pages; + + *ppos += ret; + nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + /* + * If file or inode is SYNC and we actually wrote some data, + * sync it. + */ + if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { + int err; + + mutex_lock(&inode->i_mutex); + err = ocfs2_rw_lock(inode, 1); + if (err < 0) { + mlog_errno(err); + } else { + err = generic_osync_inode(inode, mapping, + OSYNC_METADATA|OSYNC_DATA); + ocfs2_rw_unlock(inode, 1); + } + mutex_unlock(&inode->i_mutex); + + if (err) + ret = err; + } + balance_dirty_pages_ratelimited_nr(mapping, nr_pages); + } mlog_exit(ret); return ret; diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index eea1d24..b606496 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -154,8 +154,9 @@ out: return ret; } -static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct buffer_head *di_bh = NULL; sigset_t blocked, oldset; @@ -196,7 +197,8 @@ out: ret2 = ocfs2_vm_op_unblock_sigs(&oldset); if (ret2 < 0) mlog_errno(ret2); - + if (ret) + ret = VM_FAULT_SIGBUS; return ret; } diff --git a/fs/splice.c b/fs/splice.c index 4c1029a..caa79d2 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -554,8 +554,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create * a new page in the output file page cache and fill/dirty that. */ -static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, - struct splice_desc *sd) +int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, + struct splice_desc *sd) { struct file *file = sd->u.file; struct address_space *mapping = file->f_mapping; @@ -599,108 +599,178 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, out: return ret; } +EXPORT_SYMBOL(pipe_to_file); + +static void wakeup_pipe_writers(struct pipe_inode_info *pipe) +{ + smp_mb(); + if (waitqueue_active(&pipe->wait)) + wake_up_interruptible(&pipe->wait); + kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); +} /** - * __splice_from_pipe - splice data from a pipe to given actor + * splice_from_pipe_feed - feed available data from a pipe to a file * @pipe: pipe to splice from * @sd: information to @actor * @actor: handler that splices the data * * Description: - * This function does little more than loop over the pipe and call - * @actor to do the actual moving of a single struct pipe_buffer to - * the desired destination. See pipe_to_file, pipe_to_sendpage, or - * pipe_to_user. + + * This function loops over the pipe and calls @actor to do the + * actual moving of a single struct pipe_buffer to the desired + * destination. It returns when there's no more buffers left in + * the pipe or if the requested number of bytes (@sd->total_len) + * have been copied. It returns a positive number (one) if the + * pipe needs to be filled with more data, zero if the required + * number of bytes have been copied and -errno on error. * + * This, together with splice_from_pipe_{begin,end,next}, may be + * used to implement the functionality of __splice_from_pipe() when + * locking is required around copying the pipe buffers to the + * destination. */ -ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, - splice_actor *actor) +int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, + splice_actor *actor) { - int ret, do_wakeup, err; - - ret = 0; - do_wakeup = 0; - - for (;;) { - if (pipe->nrbufs) { - struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; - const struct pipe_buf_operations *ops = buf->ops; + int ret; - sd->len = buf->len; - if (sd->len > sd->total_len) - sd->len = sd->total_len; + while (pipe->nrbufs) { + struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; + const struct pipe_buf_operations *ops = buf->ops; - err = actor(pipe, buf, sd); - if (err <= 0) { - if (!ret && err != -ENODATA) - ret = err; + sd->len = buf->len; + if (sd->len > sd->total_len) + sd->len = sd->total_len; - break; - } + ret = actor(pipe, buf, sd); + if (ret <= 0) { + if (ret == -ENODATA) + ret = 0; + return ret; + } + buf->offset += ret; + buf->len -= ret; - ret += err; - buf->offset += err; - buf->len -= err; + sd->num_spliced += ret; + sd->len -= ret; + sd->pos += ret; + sd->total_len -= ret; - sd->len -= err; - sd->pos += err; - sd->total_len -= err; - if (sd->len) - continue; + if (!buf->len) { + buf->ops = NULL; + ops->release(pipe, buf); + pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); + pipe->nrbufs--; + if (pipe->inode) + sd->need_wakeup = true; + } - if (!buf->len) { - buf->ops = NULL; - ops->release(pipe, buf); - pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); - pipe->nrbufs--; - if (pipe->inode) - do_wakeup = 1; - } + if (!sd->total_len) + return 0; + } - if (!sd->total_len) - break; - } + return 1; +} +EXPORT_SYMBOL(splice_from_pipe_feed); - if (pipe->nrbufs) - continue; +/** + * splice_from_pipe_next - wait for some data to splice from + * @pipe: pipe to splice from + * @sd: information about the splice operation + * + * Description: + * This function will wait for some data and return a positive + * value (one) if pipe buffers are available. It will return zero + * or -errno if no more data needs to be spliced. + */ +int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) +{ + while (!pipe->nrbufs) { if (!pipe->writers) - break; - if (!pipe->waiting_writers) { - if (ret) - break; - } + return 0; - if (sd->flags & SPLICE_F_NONBLOCK) { - if (!ret) - ret = -EAGAIN; - break; - } + if (!pipe->waiting_writers && sd->num_spliced) + return 0; - if (signal_pending(current)) { - if (!ret) - ret = -ERESTARTSYS; - break; - } + if (sd->flags & SPLICE_F_NONBLOCK) + return -EAGAIN; - if (do_wakeup) { - smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible_sync(&pipe->wait); - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); - do_wakeup = 0; + if (signal_pending(current)) + return -ERESTARTSYS; + + if (sd->need_wakeup) { + wakeup_pipe_writers(pipe); + sd->need_wakeup = false; } pipe_wait(pipe); } - if (do_wakeup) { - smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); - } + return 1; +} +EXPORT_SYMBOL(splice_from_pipe_next); - return ret; +/** + * splice_from_pipe_begin - start splicing from pipe + * @pipe: pipe to splice from + * + * Description: + * This function should be called before a loop containing + * splice_from_pipe_next() and splice_from_pipe_feed() to + * initialize the necessary fields of @sd. + */ +void splice_from_pipe_begin(struct splice_desc *sd) +{ + sd->num_spliced = 0; + sd->need_wakeup = false; +} +EXPORT_SYMBOL(splice_from_pipe_begin); + +/** + * splice_from_pipe_end - finish splicing from pipe + * @pipe: pipe to splice from + * @sd: information about the splice operation + * + * Description: + * This function will wake up pipe writers if necessary. It should + * be called after a loop containing splice_from_pipe_next() and + * splice_from_pipe_feed(). + */ +void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) +{ + if (sd->need_wakeup) + wakeup_pipe_writers(pipe); +} +EXPORT_SYMBOL(splice_from_pipe_end); + +/** + * __splice_from_pipe - splice data from a pipe to given actor + * @pipe: pipe to splice from + * @sd: information to @actor + * @actor: handler that splices the data + * + * Description: + * This function does little more than loop over the pipe and call + * @actor to do the actual moving of a single struct pipe_buffer to + * the desired destination. See pipe_to_file, pipe_to_sendpage, or + * pipe_to_user. + * + */ +ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, + splice_actor *actor) +{ + int ret; + + splice_from_pipe_begin(sd); + do { + ret = splice_from_pipe_next(pipe, sd); + if (ret > 0) + ret = splice_from_pipe_feed(pipe, sd, actor); + } while (ret > 0); + splice_from_pipe_end(pipe, sd); + + return sd->num_spliced ? sd->num_spliced : ret; } EXPORT_SYMBOL(__splice_from_pipe); @@ -714,7 +784,7 @@ EXPORT_SYMBOL(__splice_from_pipe); * @actor: handler that splices the data * * Description: - * See __splice_from_pipe. This function locks the input and output inodes, + * See __splice_from_pipe. This function locks the pipe inode, * otherwise it's identical to __splice_from_pipe(). * */ @@ -723,7 +793,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, splice_actor *actor) { ssize_t ret; - struct inode *inode = out->f_mapping->host; struct splice_desc sd = { .total_len = len, .flags = flags, @@ -731,24 +800,11 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, .u.file = out, }; - /* - * The actor worker might be calling ->write_begin and - * ->write_end. Most of the time, these expect i_mutex to - * be held. Since this may result in an ABBA deadlock with - * pipe->inode, we have to order lock acquiry here. - * - * Outer lock must be inode->i_mutex, as pipe_wait() will - * release and reacquire pipe->inode->i_mutex, AND inode must - * never be a pipe. - */ - WARN_ON(S_ISFIFO(inode->i_mode)); - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); if (pipe->inode) - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); + mutex_lock(&pipe->inode->i_mutex); ret = __splice_from_pipe(pipe, &sd, actor); if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); - mutex_unlock(&inode->i_mutex); return ret; } @@ -839,17 +895,29 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, }; ssize_t ret; - WARN_ON(S_ISFIFO(inode->i_mode)); - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); - ret = file_remove_suid(out); - if (likely(!ret)) { - if (pipe->inode) - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); - ret = __splice_from_pipe(pipe, &sd, pipe_to_file); - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); - } - mutex_unlock(&inode->i_mutex); + if (pipe->inode) + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); + + splice_from_pipe_begin(&sd); + do { + ret = splice_from_pipe_next(pipe, &sd); + if (ret <= 0) + break; + + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); + ret = file_remove_suid(out); + if (!ret) + ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); + mutex_unlock(&inode->i_mutex); + } while (ret > 0); + splice_from_pipe_end(pipe, &sd); + + if (pipe->inode) + mutex_unlock(&pipe->inode->i_mutex); + + if (sd.num_spliced) + ret = sd.num_spliced; + if (ret > 0) { unsigned long nr_pages; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 93b6de5..0ff89fe 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1434,8 +1434,9 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) * mmap()d file has taken write protection fault and is being made * writable. UBIFS must ensure page is budgeted for. */ -static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct ubifs_info *c = inode->i_sb->s_fs_info; struct timespec now = ubifs_current_time(inode); @@ -1447,7 +1448,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY)); if (unlikely(c->ro_media)) - return -EROFS; + return VM_FAULT_SIGBUS; /* -EROFS */ /* * We have not locked @page so far so we may budget for changing the @@ -1480,7 +1481,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) if (err == -ENOSPC) ubifs_warn("out of space for mmapped file " "(inode number %lu)", inode->i_ino); - return err; + return VM_FAULT_SIGBUS; } lock_page(page); @@ -1520,6 +1521,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) out_unlock: unlock_page(page); ubifs_release_budget(c, &req); + if (err) + err = VM_FAULT_SIGBUS; return err; } diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index e14c4e3..f4e2554 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -234,9 +234,9 @@ xfs_file_mmap( STATIC int xfs_vm_page_mkwrite( struct vm_area_struct *vma, - struct page *page) + struct vm_fault *vmf) { - return block_page_mkwrite(vma, page, xfs_get_blocks); + return block_page_mkwrite(vma, vmf, xfs_get_blocks); } const struct file_operations xfs_file_operations = { diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index bd7ac79..2c2d216 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -223,7 +223,7 @@ int cont_write_begin(struct file *, struct address_space *, loff_t, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); -int block_page_mkwrite(struct vm_area_struct *vma, struct page *page, +int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d95da10..0011cd7 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -75,7 +75,8 @@ struct ftrace_branch_data { * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code * to disable branch tracing on a per file basis. */ -#if defined(CONFIG_TRACE_BRANCH_PROFILING) && !defined(DISABLE_BRANCH_PROFILING) +#if defined(CONFIG_TRACE_BRANCH_PROFILING) \ + && !defined(DISABLE_BRANCH_PROFILING) && !defined(__CHECKER__) void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #define likely_notrace(x) __builtin_expect(!!(x), 1) diff --git a/include/linux/mm.h b/include/linux/mm.h index 3daa05f..93d0a69 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -145,6 +145,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ +#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ /* * This interface is used by x86 PAT code to identify a pfn mapping that is @@ -197,7 +198,7 @@ struct vm_operations_struct { /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ - int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page); + int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf); /* called by access_process_vm when get_user_pages() fails, typically * for use by special VMAs that can switch between memory and hardware diff --git a/include/linux/splice.h b/include/linux/splice.h index 528dcb9..5f3faa9 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -36,6 +36,8 @@ struct splice_desc { void *data; /* cookie */ } u; loff_t pos; /* file position */ + size_t num_spliced; /* number of bytes already spliced */ + bool need_wakeup; /* need to wake up writer */ }; struct partial_page { @@ -66,6 +68,16 @@ extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *, splice_actor *); extern ssize_t __splice_from_pipe(struct pipe_inode_info *, struct splice_desc *, splice_actor *); +extern int splice_from_pipe_feed(struct pipe_inode_info *, struct splice_desc *, + splice_actor *); +extern int splice_from_pipe_next(struct pipe_inode_info *, + struct splice_desc *); +extern void splice_from_pipe_begin(struct splice_desc *); +extern void splice_from_pipe_end(struct pipe_inode_info *, + struct splice_desc *); +extern int pipe_to_file(struct pipe_inode_info *, struct pipe_buffer *, + struct splice_desc *); + extern ssize_t splice_to_pipe(struct pipe_inode_info *, struct splice_pipe_desc *); extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index bedc7f6..abd4436 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -40,6 +40,7 @@ #include #include #include +#include #include /* known doi values */ @@ -215,6 +216,10 @@ int cipso_v4_sock_setattr(struct sock *sk, const struct netlbl_lsm_secattr *secattr); void cipso_v4_sock_delattr(struct sock *sk); int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr); +int cipso_v4_req_setattr(struct request_sock *req, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr); +void cipso_v4_req_delattr(struct request_sock *req); int cipso_v4_skbuff_setattr(struct sk_buff *skb, const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr); @@ -247,6 +252,18 @@ static inline int cipso_v4_sock_getattr(struct sock *sk, return -ENOSYS; } +static inline int cipso_v4_req_setattr(struct request_sock *req, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline void cipso_v4_req_delattr(struct request_sock *req) +{ + return; +} + static inline int cipso_v4_skbuff_setattr(struct sk_buff *skb, const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr) diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 749011e..bf77b5c 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -36,6 +36,7 @@ #include #include #include +#include #include struct cipso_v4_doi; @@ -413,6 +414,9 @@ int netlbl_sock_getattr(struct sock *sk, int netlbl_conn_setattr(struct sock *sk, struct sockaddr *addr, const struct netlbl_lsm_secattr *secattr); +int netlbl_req_setattr(struct request_sock *req, + const struct netlbl_lsm_secattr *secattr); +void netlbl_req_delattr(struct request_sock *req); int netlbl_skbuff_setattr(struct sk_buff *skb, u16 family, const struct netlbl_lsm_secattr *secattr); @@ -519,7 +523,7 @@ static inline int netlbl_enabled(void) return 0; } static inline int netlbl_sock_setattr(struct sock *sk, - const struct netlbl_lsm_secattr *secattr) + const struct netlbl_lsm_secattr *secattr) { return -ENOSYS; } @@ -537,6 +541,15 @@ static inline int netlbl_conn_setattr(struct sock *sk, { return -ENOSYS; } +static inline int netlbl_req_setattr(struct request_sock *req, + const struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} +static inline void netlbl_req_delattr(struct request_sock *req) +{ + return; +} static inline int netlbl_skbuff_setattr(struct sk_buff *skb, u16 family, const struct netlbl_lsm_secattr *secattr) diff --git a/mm/memory.c b/mm/memory.c index d7df5ba..c304626 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1940,6 +1940,15 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, * get_user_pages(.write=1, .force=1). */ if (vma->vm_ops && vma->vm_ops->page_mkwrite) { + struct vm_fault vmf; + int tmp; + + vmf.virtual_address = (void __user *)(address & + PAGE_MASK); + vmf.pgoff = old_page->index; + vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; + vmf.page = old_page; + /* * Notify the address space that the page is about to * become writable so that it can prohibit this or wait @@ -1951,8 +1960,21 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, page_cache_get(old_page); pte_unmap_unlock(page_table, ptl); - if (vma->vm_ops->page_mkwrite(vma, old_page) < 0) + tmp = vma->vm_ops->page_mkwrite(vma, &vmf); + if (unlikely(tmp & + (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { + ret = tmp; goto unwritable_page; + } + if (unlikely(!(tmp & VM_FAULT_LOCKED))) { + lock_page(old_page); + if (!old_page->mapping) { + ret = 0; /* retry the fault */ + unlock_page(old_page); + goto unwritable_page; + } + } else + VM_BUG_ON(!PageLocked(old_page)); /* * Since we dropped the lock we need to revalidate @@ -1962,9 +1984,11 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, */ page_table = pte_offset_map_lock(mm, pmd, address, &ptl); - page_cache_release(old_page); - if (!pte_same(*page_table, orig_pte)) + if (!pte_same(*page_table, orig_pte)) { + unlock_page(old_page); + page_cache_release(old_page); goto unlock; + } page_mkwrite = 1; } @@ -2076,9 +2100,6 @@ gotten: unlock: pte_unmap_unlock(page_table, ptl); if (dirty_page) { - if (vma->vm_file) - file_update_time(vma->vm_file); - /* * Yes, Virginia, this is actually required to prevent a race * with clear_page_dirty_for_io() from clearing the page dirty @@ -2087,21 +2108,46 @@ unlock: * * do_no_page is protected similarly. */ - wait_on_page_locked(dirty_page); - set_page_dirty_balance(dirty_page, page_mkwrite); + if (!page_mkwrite) { + wait_on_page_locked(dirty_page); + set_page_dirty_balance(dirty_page, page_mkwrite); + } put_page(dirty_page); + if (page_mkwrite) { + struct address_space *mapping = dirty_page->mapping; + + set_page_dirty(dirty_page); + unlock_page(dirty_page); + page_cache_release(dirty_page); + if (mapping) { + /* + * Some device drivers do not set page.mapping + * but still dirty their pages + */ + balance_dirty_pages_ratelimited(mapping); + } + } + + /* file_update_time outside page_lock */ + if (vma->vm_file) + file_update_time(vma->vm_file); } return ret; oom_free_new: page_cache_release(new_page); oom: - if (old_page) + if (old_page) { + if (page_mkwrite) { + unlock_page(old_page); + page_cache_release(old_page); + } page_cache_release(old_page); + } return VM_FAULT_OOM; unwritable_page: page_cache_release(old_page); - return VM_FAULT_SIGBUS; + return ret; } /* @@ -2645,25 +2691,25 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, * to become writable */ if (vma->vm_ops->page_mkwrite) { + int tmp; + unlock_page(page); - if (vma->vm_ops->page_mkwrite(vma, page) < 0) { - ret = VM_FAULT_SIGBUS; - anon = 1; /* no anon but release vmf.page */ - goto out_unlocked; - } - lock_page(page); - /* - * XXX: this is not quite right (racy vs - * invalidate) to unlock and relock the page - * like this, however a better fix requires - * reworking page_mkwrite locking API, which - * is better done later. - */ - if (!page->mapping) { - ret = 0; - anon = 1; /* no anon but release vmf.page */ - goto out; + vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; + tmp = vma->vm_ops->page_mkwrite(vma, &vmf); + if (unlikely(tmp & + (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { + ret = tmp; + goto unwritable_page; } + if (unlikely(!(tmp & VM_FAULT_LOCKED))) { + lock_page(page); + if (!page->mapping) { + ret = 0; /* retry the fault */ + unlock_page(page); + goto unwritable_page; + } + } else + VM_BUG_ON(!PageLocked(page)); page_mkwrite = 1; } } @@ -2715,19 +2761,35 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, pte_unmap_unlock(page_table, ptl); out: - unlock_page(vmf.page); -out_unlocked: - if (anon) - page_cache_release(vmf.page); - else if (dirty_page) { - if (vma->vm_file) - file_update_time(vma->vm_file); + if (dirty_page) { + struct address_space *mapping = page->mapping; - set_page_dirty_balance(dirty_page, page_mkwrite); + if (set_page_dirty(dirty_page)) + page_mkwrite = 1; + unlock_page(dirty_page); put_page(dirty_page); + if (page_mkwrite && mapping) { + /* + * Some device drivers do not set page.mapping but still + * dirty their pages + */ + balance_dirty_pages_ratelimited(mapping); + } + + /* file_update_time outside page_lock */ + if (vma->vm_file) + file_update_time(vma->vm_file); + } else { + unlock_page(vmf.page); + if (anon) + page_cache_release(vmf.page); } return ret; + +unwritable_page: + page_cache_release(page); + return ret; } static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 7bc9929..4ea2c38 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1942,6 +1942,72 @@ socket_setattr_failure: } /** + * cipso_v4_req_setattr - Add a CIPSO option to a connection request socket + * @req: the connection request socket + * @doi_def: the CIPSO DOI to use + * @secattr: the specific security attributes of the socket + * + * Description: + * Set the CIPSO option on the given socket using the DOI definition and + * security attributes passed to the function. Returns zero on success and + * negative values on failure. + * + */ +int cipso_v4_req_setattr(struct request_sock *req, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -EPERM; + unsigned char *buf = NULL; + u32 buf_len; + u32 opt_len; + struct ip_options *opt = NULL; + struct inet_request_sock *req_inet; + + /* We allocate the maximum CIPSO option size here so we are probably + * being a little wasteful, but it makes our life _much_ easier later + * on and after all we are only talking about 40 bytes. */ + buf_len = CIPSO_V4_OPT_LEN_MAX; + buf = kmalloc(buf_len, GFP_ATOMIC); + if (buf == NULL) { + ret_val = -ENOMEM; + goto req_setattr_failure; + } + + ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr); + if (ret_val < 0) + goto req_setattr_failure; + buf_len = ret_val; + + /* We can't use ip_options_get() directly because it makes a call to + * ip_options_get_alloc() which allocates memory with GFP_KERNEL and + * we won't always have CAP_NET_RAW even though we _always_ want to + * set the IPOPT_CIPSO option. */ + opt_len = (buf_len + 3) & ~3; + opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC); + if (opt == NULL) { + ret_val = -ENOMEM; + goto req_setattr_failure; + } + memcpy(opt->__data, buf, buf_len); + opt->optlen = opt_len; + opt->cipso = sizeof(struct iphdr); + kfree(buf); + buf = NULL; + + req_inet = inet_rsk(req); + opt = xchg(&req_inet->opt, opt); + kfree(opt); + + return 0; + +req_setattr_failure: + kfree(buf); + kfree(opt); + return ret_val; +} + +/** * cipso_v4_sock_delattr - Delete the CIPSO option from a socket * @sk: the socket * @@ -2016,6 +2082,70 @@ void cipso_v4_sock_delattr(struct sock *sk) } /** + * cipso_v4_req_delattr - Delete the CIPSO option from a request socket + * @reg: the request socket + * + * Description: + * Removes the CIPSO option from a request socket, if present. + * + */ +void cipso_v4_req_delattr(struct request_sock *req) +{ + struct ip_options *opt; + struct inet_request_sock *req_inet; + + req_inet = inet_rsk(req); + opt = req_inet->opt; + if (opt == NULL || opt->cipso == 0) + return; + + if (opt->srr || opt->rr || opt->ts || opt->router_alert) { + u8 cipso_len; + u8 cipso_off; + unsigned char *cipso_ptr; + int iter; + int optlen_new; + + cipso_off = opt->cipso - sizeof(struct iphdr); + cipso_ptr = &opt->__data[cipso_off]; + cipso_len = cipso_ptr[1]; + + if (opt->srr > opt->cipso) + opt->srr -= cipso_len; + if (opt->rr > opt->cipso) + opt->rr -= cipso_len; + if (opt->ts > opt->cipso) + opt->ts -= cipso_len; + if (opt->router_alert > opt->cipso) + opt->router_alert -= cipso_len; + opt->cipso = 0; + + memmove(cipso_ptr, cipso_ptr + cipso_len, + opt->optlen - cipso_off - cipso_len); + + /* determining the new total option length is tricky because of + * the padding necessary, the only thing i can think to do at + * this point is walk the options one-by-one, skipping the + * padding at the end to determine the actual option size and + * from there we can determine the new total option length */ + iter = 0; + optlen_new = 0; + while (iter < opt->optlen) + if (opt->__data[iter] != IPOPT_NOP) { + iter += opt->__data[iter + 1]; + optlen_new = iter; + } else + iter++; + opt->optlen = (optlen_new + 3) & ~3; + } else { + /* only the cipso option was present on the socket so we can + * remove the entire option struct */ + req_inet->opt = NULL; + kfree(opt); + } +} + +/** * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions * @cipso: the CIPSO v4 option * @secattr: the security attributes diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index d346c22..b35a950 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -288,10 +288,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, if (!req) goto out; - if (security_inet_conn_request(sk, skb, req)) { - reqsk_free(req); - goto out; - } ireq = inet_rsk(req); treq = tcp_rsk(req); treq->rcv_isn = ntohl(th->seq) - 1; @@ -322,6 +318,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, } } + if (security_inet_conn_request(sk, skb, req)) { + reqsk_free(req); + goto out; + } + req->expires = 0UL; req->retrans = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cf74c41..5499c28 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1239,14 +1239,15 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_openreq_init(req, &tmp_opt, skb); - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; ireq->no_srccheck = inet_sk(sk)->transparent; ireq->opt = tcp_v4_save_options(sk, skb); + + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + if (!want_cookie) TCP_ECN_create_request(req, tcp_hdr(skb)); diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index fd9229d..a52ca1c 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -757,6 +757,90 @@ conn_setattr_return: } /** + * netlbl_req_setattr - Label a request socket using the correct protocol + * @req: the request socket to label + * @secattr: the security attributes + * + * Description: + * Attach the correct label to the given socket using the security attributes + * specified in @secattr. Returns zero on success, negative values on failure. + * + */ +int netlbl_req_setattr(struct request_sock *req, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + struct netlbl_dom_map *dom_entry; + struct netlbl_domaddr4_map *af4_entry; + u32 proto_type; + struct cipso_v4_doi *proto_cv4; + + rcu_read_lock(); + dom_entry = netlbl_domhsh_getentry(secattr->domain); + if (dom_entry == NULL) { + ret_val = -ENOENT; + goto req_setattr_return; + } + switch (req->rsk_ops->family) { + case AF_INET: + if (dom_entry->type == NETLBL_NLTYPE_ADDRSELECT) { + struct inet_request_sock *req_inet = inet_rsk(req); + af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, + req_inet->rmt_addr); + if (af4_entry == NULL) { + ret_val = -ENOENT; + goto req_setattr_return; + } + proto_type = af4_entry->type; + proto_cv4 = af4_entry->type_def.cipsov4; + } else { + proto_type = dom_entry->type; + proto_cv4 = dom_entry->type_def.cipsov4; + } + switch (proto_type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = cipso_v4_req_setattr(req, proto_cv4, secattr); + break; + case NETLBL_NLTYPE_UNLABELED: + /* just delete the protocols we support for right now + * but we could remove other protocols if needed */ + cipso_v4_req_delattr(req); + ret_val = 0; + break; + default: + ret_val = -ENOENT; + } + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + /* since we don't support any IPv6 labeling protocols right + * now we can optimize everything away until we do */ + ret_val = 0; + break; +#endif /* IPv6 */ + default: + ret_val = -EPROTONOSUPPORT; + } + +req_setattr_return: + rcu_read_unlock(); + return ret_val; +} + +/** +* netlbl_req_delattr - Delete all the NetLabel labels on a socket +* @req: the socket +* +* Description: +* Remove all the NetLabel labeling from @req. +* +*/ +void netlbl_req_delattr(struct request_sock *req) +{ + cipso_v4_req_delattr(req); +} + +/** * netlbl_skbuff_setattr - Label a packet using the correct protocol * @skb: the packet * @family: protocol family diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index e210b21..8d24c91 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -311,7 +311,7 @@ static int sk_alloc_security(struct sock *sk, int family, gfp_t priority) ssec->sid = SECINITSID_UNLABELED; sk->sk_security = ssec; - selinux_netlbl_sk_security_reset(ssec, family); + selinux_netlbl_sk_security_reset(ssec); return 0; } @@ -2952,7 +2952,6 @@ static void selinux_inode_getsecid(const struct inode *inode, u32 *secid) static int selinux_revalidate_file_permission(struct file *file, int mask) { const struct cred *cred = current_cred(); - int rc; struct inode *inode = file->f_path.dentry->d_inode; if (!mask) { @@ -2964,30 +2963,16 @@ static int selinux_revalidate_file_permission(struct file *file, int mask) if ((file->f_flags & O_APPEND) && (mask & MAY_WRITE)) mask |= MAY_APPEND; - rc = file_has_perm(cred, file, - file_mask_to_av(inode->i_mode, mask)); - if (rc) - return rc; - - return selinux_netlbl_inode_permission(inode, mask); + return file_has_perm(cred, file, file_mask_to_av(inode->i_mode, mask)); } static int selinux_file_permission(struct file *file, int mask) { - struct inode *inode = file->f_path.dentry->d_inode; - struct file_security_struct *fsec = file->f_security; - struct inode_security_struct *isec = inode->i_security; - u32 sid = current_sid(); - if (!mask) { /* No permission to check. Existence test. */ return 0; } - if (sid == fsec->sid && fsec->isid == isec->sid - && fsec->pseqno == avc_policy_seqno()) - return selinux_netlbl_inode_permission(inode, mask); - return selinux_revalidate_file_permission(file, mask); } @@ -3799,7 +3784,7 @@ static int selinux_socket_post_create(struct socket *sock, int family, sksec = sock->sk->sk_security; sksec->sid = isec->sid; sksec->sclass = isec->sclass; - err = selinux_netlbl_socket_post_create(sock); + err = selinux_netlbl_socket_post_create(sock->sk, family); } return err; @@ -3990,13 +3975,7 @@ static int selinux_socket_accept(struct socket *sock, struct socket *newsock) static int selinux_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size) { - int rc; - - rc = socket_has_perm(current, sock, SOCKET__WRITE); - if (rc) - return rc; - - return selinux_netlbl_inode_permission(SOCK_INODE(sock), MAY_WRITE); + return socket_has_perm(current, sock, SOCKET__WRITE); } static int selinux_socket_recvmsg(struct socket *sock, struct msghdr *msg, @@ -4384,7 +4363,7 @@ static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk) newssec->peer_sid = ssec->peer_sid; newssec->sclass = ssec->sclass; - selinux_netlbl_sk_security_reset(newssec, newsk->sk_family); + selinux_netlbl_sk_security_reset(newssec); } static void selinux_sk_getsecid(struct sock *sk, u32 *secid) @@ -4429,15 +4408,15 @@ static int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, req->secid = sksec->sid; req->peer_secid = SECSID_NULL; return 0; + } else { + err = security_sid_mls_copy(sksec->sid, peersid, &newsid); + if (err) + return err; + req->secid = newsid; + req->peer_secid = peersid; } - err = security_sid_mls_copy(sksec->sid, peersid, &newsid); - if (err) - return err; - - req->secid = newsid; - req->peer_secid = peersid; - return 0; + return selinux_netlbl_inet_conn_request(req, family); } static void selinux_inet_csk_clone(struct sock *newsk, @@ -4454,7 +4433,7 @@ static void selinux_inet_csk_clone(struct sock *newsk, /* We don't need to take any sort of lock here as we are the only * thread with access to newsksec */ - selinux_netlbl_sk_security_reset(newsksec, req->rsk_ops->family); + selinux_netlbl_inet_csk_clone(newsk, req->rsk_ops->family); } static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb) @@ -4467,8 +4446,6 @@ static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb) family = PF_INET; selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid); - - selinux_netlbl_inet_conn_established(sk, family); } static void selinux_req_classify_flow(const struct request_sock *req, diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h index b913c8d..a5537cd 100644 --- a/security/selinux/include/netlabel.h +++ b/security/selinux/include/netlabel.h @@ -32,6 +32,7 @@ #include #include #include +#include #include "avc.h" #include "objsec.h" @@ -42,8 +43,7 @@ void selinux_netlbl_cache_invalidate(void); void selinux_netlbl_err(struct sk_buff *skb, int error, int gateway); void selinux_netlbl_sk_security_free(struct sk_security_struct *ssec); -void selinux_netlbl_sk_security_reset(struct sk_security_struct *ssec, - int family); +void selinux_netlbl_sk_security_reset(struct sk_security_struct *ssec); int selinux_netlbl_skbuff_getsid(struct sk_buff *skb, u16 family, @@ -53,8 +53,9 @@ int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, u16 family, u32 sid); -void selinux_netlbl_inet_conn_established(struct sock *sk, u16 family); -int selinux_netlbl_socket_post_create(struct socket *sock); +int selinux_netlbl_inet_conn_request(struct request_sock *req, u16 family); +void selinux_netlbl_inet_csk_clone(struct sock *sk, u16 family); +int selinux_netlbl_socket_post_create(struct sock *sk, u16 family); int selinux_netlbl_inode_permission(struct inode *inode, int mask); int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, struct sk_buff *skb, @@ -85,8 +86,7 @@ static inline void selinux_netlbl_sk_security_free( } static inline void selinux_netlbl_sk_security_reset( - struct sk_security_struct *ssec, - int family) + struct sk_security_struct *ssec) { return; } @@ -113,12 +113,17 @@ static inline int selinux_netlbl_conn_setsid(struct sock *sk, return 0; } -static inline void selinux_netlbl_inet_conn_established(struct sock *sk, - u16 family) +static inline int selinux_netlbl_inet_conn_request(struct request_sock *req, + u16 family) +{ + return 0; +} +static inline void selinux_netlbl_inet_csk_clone(struct sock *sk, u16 family) { return; } -static inline int selinux_netlbl_socket_post_create(struct socket *sock) +static inline int selinux_netlbl_socket_post_create(struct sock *sk, + u16 family) { return 0; } diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index 350794a..5786c8c 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -100,41 +100,6 @@ static struct netlbl_lsm_secattr *selinux_netlbl_sock_genattr(struct sock *sk) } /** - * selinux_netlbl_sock_setsid - Label a socket using the NetLabel mechanism - * @sk: the socket to label - * - * Description: - * Attempt to label a socket using the NetLabel mechanism. Returns zero values - * on success, negative values on failure. - * - */ -static int selinux_netlbl_sock_setsid(struct sock *sk) -{ - int rc; - struct sk_security_struct *sksec = sk->sk_security; - struct netlbl_lsm_secattr *secattr; - - if (sksec->nlbl_state != NLBL_REQUIRE) - return 0; - - secattr = selinux_netlbl_sock_genattr(sk); - if (secattr == NULL) - return -ENOMEM; - rc = netlbl_sock_setattr(sk, secattr); - switch (rc) { - case 0: - sksec->nlbl_state = NLBL_LABELED; - break; - case -EDESTADDRREQ: - sksec->nlbl_state = NLBL_REQSKB; - rc = 0; - break; - } - - return rc; -} - -/** * selinux_netlbl_cache_invalidate - Invalidate the NetLabel cache * * Description: @@ -188,13 +153,9 @@ void selinux_netlbl_sk_security_free(struct sk_security_struct *ssec) * The caller is responsibile for all the NetLabel sk_security_struct locking. * */ -void selinux_netlbl_sk_security_reset(struct sk_security_struct *ssec, - int family) +void selinux_netlbl_sk_security_reset(struct sk_security_struct *ssec) { - if (family == PF_INET) - ssec->nlbl_state = NLBL_REQUIRE; - else - ssec->nlbl_state = NLBL_UNSET; + ssec->nlbl_state = NLBL_UNSET; } /** @@ -281,127 +242,85 @@ skbuff_setsid_return: } /** - * selinux_netlbl_inet_conn_established - Netlabel the newly accepted connection - * @sk: the new connection + * selinux_netlbl_inet_conn_request - Label an incoming stream connection + * @req: incoming connection request socket * * Description: - * A new connection has been established on @sk so make sure it is labeled - * correctly with the NetLabel susbsystem. + * A new incoming connection request is represented by @req, we need to label + * the new request_sock here and the stack will ensure the on-the-wire label + * will get preserved when a full sock is created once the connection handshake + * is complete. Returns zero on success, negative values on failure. * */ -void selinux_netlbl_inet_conn_established(struct sock *sk, u16 family) +int selinux_netlbl_inet_conn_request(struct request_sock *req, u16 family) { int rc; - struct sk_security_struct *sksec = sk->sk_security; - struct netlbl_lsm_secattr *secattr; - struct inet_sock *sk_inet = inet_sk(sk); - struct sockaddr_in addr; - - if (sksec->nlbl_state != NLBL_REQUIRE) - return; + struct netlbl_lsm_secattr secattr; - secattr = selinux_netlbl_sock_genattr(sk); - if (secattr == NULL) - return; + if (family != PF_INET) + return 0; - rc = netlbl_sock_setattr(sk, secattr); - switch (rc) { - case 0: - sksec->nlbl_state = NLBL_LABELED; - break; - case -EDESTADDRREQ: - /* no PF_INET6 support yet because we don't support any IPv6 - * labeling protocols */ - if (family != PF_INET) { - sksec->nlbl_state = NLBL_UNSET; - return; - } - - addr.sin_family = family; - addr.sin_addr.s_addr = sk_inet->daddr; - if (netlbl_conn_setattr(sk, (struct sockaddr *)&addr, - secattr) != 0) { - /* we failed to label the connected socket (could be - * for a variety of reasons, the actual "why" isn't - * important here) so we have to go to our backup plan, - * labeling the packets individually in the netfilter - * local output hook. this is okay but we need to - * adjust the MSS of the connection to take into - * account any labeling overhead, since we don't know - * the exact overhead at this point we'll use the worst - * case value which is 40 bytes for IPv4 */ - struct inet_connection_sock *sk_conn = inet_csk(sk); - sk_conn->icsk_ext_hdr_len += 40 - - (sk_inet->opt ? sk_inet->opt->optlen : 0); - sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); - - sksec->nlbl_state = NLBL_REQSKB; - } else - sksec->nlbl_state = NLBL_CONNLABELED; - break; - default: - /* note that we are failing to label the socket which could be - * a bad thing since it means traffic could leave the system - * without the desired labeling, however, all is not lost as - * we have a check in selinux_netlbl_inode_permission() to - * pick up the pieces that we might drop here because we can't - * return an error code */ - break; - } + netlbl_secattr_init(&secattr); + rc = security_netlbl_sid_to_secattr(req->secid, &secattr); + if (rc != 0) + goto inet_conn_request_return; + rc = netlbl_req_setattr(req, &secattr); +inet_conn_request_return: + netlbl_secattr_destroy(&secattr); + return rc; } /** - * selinux_netlbl_socket_post_create - Label a socket using NetLabel - * @sock: the socket to label + * selinux_netlbl_inet_csk_clone - Initialize the newly created sock + * @sk: the new sock * * Description: - * Attempt to label a socket using the NetLabel mechanism using the given - * SID. Returns zero values on success, negative values on failure. + * A new connection has been established using @sk, we've already labeled the + * socket via the request_sock struct in selinux_netlbl_inet_conn_request() but + * we need to set the NetLabel state here since we now have a sock structure. * */ -int selinux_netlbl_socket_post_create(struct socket *sock) +void selinux_netlbl_inet_csk_clone(struct sock *sk, u16 family) { - return selinux_netlbl_sock_setsid(sock->sk); + struct sk_security_struct *sksec = sk->sk_security; + + if (family == PF_INET) + sksec->nlbl_state = NLBL_LABELED; + else + sksec->nlbl_state = NLBL_UNSET; } /** - * selinux_netlbl_inode_permission - Verify the socket is NetLabel labeled - * @inode: the file descriptor's inode - * @mask: the permission mask + * selinux_netlbl_socket_post_create - Label a socket using NetLabel + * @sock: the socket to label * * Description: - * Looks at a file's inode and if it is marked as a socket protected by - * NetLabel then verify that the socket has been labeled, if not try to label - * the socket now with the inode's SID. Returns zero on success, negative - * values on failure. + * Attempt to label a socket using the NetLabel mechanism using the given + * SID. Returns zero values on success, negative values on failure. * */ -int selinux_netlbl_inode_permission(struct inode *inode, int mask) +int selinux_netlbl_socket_post_create(struct sock *sk, u16 family) { int rc; - struct sock *sk; - struct socket *sock; - struct sk_security_struct *sksec; + struct sk_security_struct *sksec = sk->sk_security; + struct netlbl_lsm_secattr *secattr; - if (!S_ISSOCK(inode->i_mode) || - ((mask & (MAY_WRITE | MAY_APPEND)) == 0)) - return 0; - sock = SOCKET_I(inode); - sk = sock->sk; - if (sk == NULL) - return 0; - sksec = sk->sk_security; - if (sksec == NULL || sksec->nlbl_state != NLBL_REQUIRE) + if (family != PF_INET) return 0; - local_bh_disable(); - bh_lock_sock_nested(sk); - if (likely(sksec->nlbl_state == NLBL_REQUIRE)) - rc = selinux_netlbl_sock_setsid(sk); - else + secattr = selinux_netlbl_sock_genattr(sk); + if (secattr == NULL) + return -ENOMEM; + rc = netlbl_sock_setattr(sk, secattr); + switch (rc) { + case 0: + sksec->nlbl_state = NLBL_LABELED; + break; + case -EDESTADDRREQ: + sksec->nlbl_state = NLBL_REQSKB; rc = 0; - bh_unlock_sock(sk); - local_bh_enable(); + break; + } return rc; } diff --git a/security/smack/smack.h b/security/smack/smack.h index b79582e..1983196 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -40,7 +40,6 @@ struct superblock_smack { struct socket_smack { char *smk_out; /* outbound label */ char *smk_in; /* inbound label */ - int smk_labeled; /* label scheme */ char smk_packet[SMK_LABELLEN]; /* TCP peer label */ }; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index c1c5f36..b4e811b 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -7,6 +7,8 @@ * Casey Schaufler * * Copyright (C) 2007 Casey Schaufler + * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. + * Paul Moore * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, @@ -20,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -1279,7 +1282,6 @@ static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags) ssp->smk_in = csp; ssp->smk_out = csp; - ssp->smk_labeled = SMACK_CIPSO_SOCKET; ssp->smk_packet[0] = '\0'; sk->sk_security = ssp; @@ -1397,16 +1399,6 @@ static int smack_netlabel(struct sock *sk, int labeled) bh_unlock_sock(sk); local_bh_enable(); - /* - * Remember the label scheme used so that it is not - * necessary to do the netlabel setting if it has not - * changed the next time through. - * - * The -EDESTADDRREQ case is an indication that there's - * a single level host involved. - */ - if (rc == 0) - ssp->smk_labeled = labeled; return rc; } @@ -1551,19 +1543,14 @@ static int smack_socket_connect(struct socket *sock, struct sockaddr *sap, return -EINVAL; hostsp = smack_host_label((struct sockaddr_in *)sap); - if (hostsp == NULL) { - if (ssp->smk_labeled != SMACK_CIPSO_SOCKET) - return smack_netlabel(sock->sk, SMACK_CIPSO_SOCKET); - return 0; - } + if (hostsp == NULL) + return smack_netlabel(sock->sk, SMACK_CIPSO_SOCKET); rc = smk_access(ssp->smk_out, hostsp, MAY_WRITE); if (rc != 0) return rc; - if (ssp->smk_labeled != SMACK_UNLABELED_SOCKET) - return smack_netlabel(sock->sk, SMACK_UNLABELED_SOCKET); - return 0; + return smack_netlabel(sock->sk, SMACK_UNLABELED_SOCKET); } /** @@ -2275,21 +2262,14 @@ static int smack_socket_sendmsg(struct socket *sock, struct msghdr *msg, return 0; hostsp = smack_host_label(sip); - if (hostsp == NULL) { - if (ssp->smk_labeled != SMACK_CIPSO_SOCKET) - return smack_netlabel(sock->sk, SMACK_CIPSO_SOCKET); - return 0; - } + if (hostsp == NULL) + return smack_netlabel(sock->sk, SMACK_CIPSO_SOCKET); rc = smk_access(ssp->smk_out, hostsp, MAY_WRITE); if (rc != 0) return rc; - if (ssp->smk_labeled != SMACK_UNLABELED_SOCKET) - return smack_netlabel(sock->sk, SMACK_UNLABELED_SOCKET); - - return 0; - + return smack_netlabel(sock->sk, SMACK_UNLABELED_SOCKET); } @@ -2504,22 +2484,14 @@ static int smack_socket_getpeersec_dgram(struct socket *sock, static void smack_sock_graft(struct sock *sk, struct socket *parent) { struct socket_smack *ssp; - int rc; - if (sk == NULL) - return; - - if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6) + if (sk == NULL || + (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)) return; ssp = sk->sk_security; ssp->smk_in = ssp->smk_out = current_security(); - ssp->smk_packet[0] = '\0'; - - rc = smack_netlabel(sk, SMACK_CIPSO_SOCKET); - if (rc != 0) - printk(KERN_WARNING "Smack: \"%s\" netlbl error %d.\n", - __func__, -rc); + /* cssp->smk_packet is already set in smack_inet_csk_clone() */ } /** @@ -2534,35 +2506,82 @@ static void smack_sock_graft(struct sock *sk, struct socket *parent) static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - struct netlbl_lsm_secattr skb_secattr; + u16 family = sk->sk_family; struct socket_smack *ssp = sk->sk_security; + struct netlbl_lsm_secattr secattr; + struct sockaddr_in addr; + struct iphdr *hdr; char smack[SMK_LABELLEN]; int rc; - if (skb == NULL) - return -EACCES; + /* handle mapped IPv4 packets arriving via IPv6 sockets */ + if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) + family = PF_INET; - netlbl_secattr_init(&skb_secattr); - rc = netlbl_skbuff_getattr(skb, sk->sk_family, &skb_secattr); + netlbl_secattr_init(&secattr); + rc = netlbl_skbuff_getattr(skb, family, &secattr); if (rc == 0) - smack_from_secattr(&skb_secattr, smack); + smack_from_secattr(&secattr, smack); else strncpy(smack, smack_known_huh.smk_known, SMK_MAXLEN); - netlbl_secattr_destroy(&skb_secattr); + netlbl_secattr_destroy(&secattr); + /* - * Receiving a packet requires that the other end - * be able to write here. Read access is not required. - * - * If the request is successful save the peer's label - * so that SO_PEERCRED can report it. - */ + * Receiving a packet requires that the other end be able to write + * here. Read access is not required. + */ rc = smk_access(smack, ssp->smk_in, MAY_WRITE); - if (rc == 0) - strncpy(ssp->smk_packet, smack, SMK_MAXLEN); + if (rc != 0) + return rc; + + /* + * Save the peer's label in the request_sock so we can later setup + * smk_packet in the child socket so that SO_PEERCRED can report it. + */ + req->peer_secid = smack_to_secid(smack); + + /* + * We need to decide if we want to label the incoming connection here + * if we do we only need to label the request_sock and the stack will + * propogate the wire-label to the sock when it is created. + */ + hdr = ip_hdr(skb); + addr.sin_addr.s_addr = hdr->saddr; + rcu_read_lock(); + if (smack_host_label(&addr) == NULL) { + rcu_read_unlock(); + netlbl_secattr_init(&secattr); + smack_to_secattr(smack, &secattr); + rc = netlbl_req_setattr(req, &secattr); + netlbl_secattr_destroy(&secattr); + } else { + rcu_read_unlock(); + netlbl_req_delattr(req); + } return rc; } +/** +* smack_inet_csk_clone - Copy the connection information to the new socket +* @sk: the new socket +* @req: the connection's request_sock +* +* Transfer the connection's peer label to the newly created socket. +*/ +static void smack_inet_csk_clone(struct sock *sk, + const struct request_sock *req) +{ + struct socket_smack *ssp = sk->sk_security; + char *smack; + + if (req->peer_secid != 0) { + smack = smack_from_secid(req->peer_secid); + strncpy(ssp->smk_packet, smack, SMK_MAXLEN); + } else + ssp->smk_packet[0] = '\0'; +} + /* * Key management security hooks * @@ -2915,6 +2934,7 @@ struct security_operations smack_ops = { .sk_free_security = smack_sk_free_security, .sock_graft = smack_sock_graft, .inet_conn_request = smack_inet_conn_request, + .inet_csk_clone = smack_inet_csk_clone, /* key management security hooks */ #ifdef CONFIG_KEYS diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 6094344..0547239 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -4007,7 +4007,12 @@ static int stac92xx_init(struct hda_codec *codec) pinctl = snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_PIN_WIDGET_CONTROL, 0); /* if PINCTL already set then skip */ - if (!(pinctl & AC_PINCTL_IN_EN)) { + /* Also, if both INPUT and OUTPUT are set, + * it must be a BIOS bug; need to override, too + */ + if (!(pinctl & AC_PINCTL_IN_EN) || + (pinctl & AC_PINCTL_OUT_EN)) { + pinctl &= ~AC_PINCTL_OUT_EN; pinctl |= AC_PINCTL_IN_EN; stac92xx_auto_set_pinctl(codec, nid, pinctl); diff --git a/sound/soc/codecs/wm8990.c b/sound/soc/codecs/wm8990.c index a5731fa..380302d 100644 --- a/sound/soc/codecs/wm8990.c +++ b/sound/soc/codecs/wm8990.c @@ -744,7 +744,7 @@ SND_SOC_DAPM_MIXER_E("INMIXL", WM8990_INTDRIVBITS, WM8990_INMIXL_PWR_BIT, 0, inmixer_event, SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD), /* AINLMUX */ -SND_SOC_DAPM_MUX_E("AILNMUX", WM8990_INTDRIVBITS, WM8990_AINLMUX_PWR_BIT, 0, +SND_SOC_DAPM_MUX_E("AINLMUX", WM8990_INTDRIVBITS, WM8990_AINLMUX_PWR_BIT, 0, &wm8990_dapm_ainlmux_controls, inmixer_event, SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD), @@ -755,7 +755,7 @@ SND_SOC_DAPM_MIXER_E("INMIXR", WM8990_INTDRIVBITS, WM8990_INMIXR_PWR_BIT, 0, inmixer_event, SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD), /* AINRMUX */ -SND_SOC_DAPM_MUX_E("AIRNMUX", WM8990_INTDRIVBITS, WM8990_AINRMUX_PWR_BIT, 0, +SND_SOC_DAPM_MUX_E("AINRMUX", WM8990_INTDRIVBITS, WM8990_AINRMUX_PWR_BIT, 0, &wm8990_dapm_ainrmux_controls, inmixer_event, SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD), @@ -863,40 +863,40 @@ static const struct snd_soc_dapm_route audio_map[] = { {"LIN12 PGA", "LIN2 Switch", "LIN2"}, /* LIN34 PGA */ {"LIN34 PGA", "LIN3 Switch", "LIN3"}, - {"LIN34 PGA", "LIN4 Switch", "LIN4"}, + {"LIN34 PGA", "LIN4 Switch", "LIN4/RXN"}, /* INMIXL */ {"INMIXL", "Record Left Volume", "LOMIX"}, {"INMIXL", "LIN2 Volume", "LIN2"}, {"INMIXL", "LINPGA12 Switch", "LIN12 PGA"}, {"INMIXL", "LINPGA34 Switch", "LIN34 PGA"}, - /* AILNMUX */ - {"AILNMUX", "INMIXL Mix", "INMIXL"}, - {"AILNMUX", "DIFFINL Mix", "LIN12PGA"}, - {"AILNMUX", "DIFFINL Mix", "LIN34PGA"}, - {"AILNMUX", "RXVOICE Mix", "LIN4/RXN"}, - {"AILNMUX", "RXVOICE Mix", "RIN4/RXP"}, + /* AINLMUX */ + {"AINLMUX", "INMIXL Mix", "INMIXL"}, + {"AINLMUX", "DIFFINL Mix", "LIN12 PGA"}, + {"AINLMUX", "DIFFINL Mix", "LIN34 PGA"}, + {"AINLMUX", "RXVOICE Mix", "LIN4/RXN"}, + {"AINLMUX", "RXVOICE Mix", "RIN4/RXP"}, /* ADC */ - {"Left ADC", NULL, "AILNMUX"}, + {"Left ADC", NULL, "AINLMUX"}, /* RIN12 PGA */ {"RIN12 PGA", "RIN1 Switch", "RIN1"}, {"RIN12 PGA", "RIN2 Switch", "RIN2"}, /* RIN34 PGA */ {"RIN34 PGA", "RIN3 Switch", "RIN3"}, - {"RIN34 PGA", "RIN4 Switch", "RIN4"}, + {"RIN34 PGA", "RIN4 Switch", "RIN4/RXP"}, /* INMIXL */ {"INMIXR", "Record Right Volume", "ROMIX"}, {"INMIXR", "RIN2 Volume", "RIN2"}, {"INMIXR", "RINPGA12 Switch", "RIN12 PGA"}, {"INMIXR", "RINPGA34 Switch", "RIN34 PGA"}, - /* AIRNMUX */ - {"AIRNMUX", "INMIXR Mix", "INMIXR"}, - {"AIRNMUX", "DIFFINR Mix", "RIN12PGA"}, - {"AIRNMUX", "DIFFINR Mix", "RIN34PGA"}, - {"AIRNMUX", "RXVOICE Mix", "RIN4/RXN"}, - {"AIRNMUX", "RXVOICE Mix", "RIN4/RXP"}, + /* AINRMUX */ + {"AINRMUX", "INMIXR Mix", "INMIXR"}, + {"AINRMUX", "DIFFINR Mix", "RIN12 PGA"}, + {"AINRMUX", "DIFFINR Mix", "RIN34 PGA"}, + {"AINRMUX", "RXVOICE Mix", "LIN4/RXN"}, + {"AINRMUX", "RXVOICE Mix", "RIN4/RXP"}, /* ADC */ - {"Right ADC", NULL, "AIRNMUX"}, + {"Right ADC", NULL, "AINRMUX"}, /* LOMIX */ {"LOMIX", "LOMIX RIN3 Bypass Switch", "RIN3"}, @@ -937,7 +937,7 @@ static const struct snd_soc_dapm_route audio_map[] = { {"LOPMIX", "LOPMIX Left Mixer PGA Switch", "LOPGA"}, /* OUT3MIX */ - {"OUT3MIX", "OUT3MIX LIN4/RXP Bypass Switch", "LIN4/RXP"}, + {"OUT3MIX", "OUT3MIX LIN4/RXP Bypass Switch", "LIN4/RXN"}, {"OUT3MIX", "OUT3MIX Left Out PGA Switch", "LOPGA"}, /* OUT4MIX */ @@ -964,7 +964,7 @@ static const struct snd_soc_dapm_route audio_map[] = { /* Output Pins */ {"LON", NULL, "LONMIX"}, {"LOP", NULL, "LOPMIX"}, - {"OUT", NULL, "OUT3MIX"}, + {"OUT3", NULL, "OUT3MIX"}, {"LOUT", NULL, "LOUT PGA"}, {"SPKN", NULL, "SPKMIX"}, {"ROUT", NULL, "ROUT PGA"},