Magellan Linux

Contents of /trunk/kernel26-alx/patches-2.6.27-r3/0123-2.6.27.24-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1176 - (show annotations) (download)
Thu Oct 14 15:11:06 2010 UTC (13 years, 6 months ago) by niro
File size: 49489 byte(s)
-2.6.27-alx-r3: new magellan 0.5.2 kernel
1 diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
2 index 8362860..0a7c8a9 100644
3 --- a/Documentation/filesystems/Locking
4 +++ b/Documentation/filesystems/Locking
5 @@ -502,23 +502,31 @@ prototypes:
6 void (*open)(struct vm_area_struct*);
7 void (*close)(struct vm_area_struct*);
8 int (*fault)(struct vm_area_struct*, struct vm_fault *);
9 - int (*page_mkwrite)(struct vm_area_struct *, struct page *);
10 + int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
11 int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
12
13 locking rules:
14 BKL mmap_sem PageLocked(page)
15 open: no yes
16 close: no yes
17 -fault: no yes
18 -page_mkwrite: no yes no
19 +fault: no yes can return with page locked
20 +page_mkwrite: no yes can return with page locked
21 access: no yes
22
23 - ->page_mkwrite() is called when a previously read-only page is
24 -about to become writeable. The file system is responsible for
25 -protecting against truncate races. Once appropriate action has been
26 -taking to lock out truncate, the page range should be verified to be
27 -within i_size. The page mapping should also be checked that it is not
28 -NULL.
29 + ->fault() is called when a previously not present pte is about
30 +to be faulted in. The filesystem must find and return the page associated
31 +with the passed in "pgoff" in the vm_fault structure. If it is possible that
32 +the page may be truncated and/or invalidated, then the filesystem must lock
33 +the page, then ensure it is not already truncated (the page lock will block
34 +subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
35 +locked. The VM will unlock the page.
36 +
37 + ->page_mkwrite() is called when a previously read-only pte is
38 +about to become writeable. The filesystem again must ensure that there are
39 +no truncate/invalidate races, and then return with the page locked. If
40 +the page has been truncated, the filesystem should not look up a new page
41 +like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
42 +will cause the VM to retry the fault.
43
44 ->access() is called when get_user_pages() fails in
45 acces_process_vm(), typically used to debug a process through
46 diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c
47 index eb8f72c..0e034a4 100644
48 --- a/drivers/i2c/algos/i2c-algo-bit.c
49 +++ b/drivers/i2c/algos/i2c-algo-bit.c
50 @@ -104,7 +104,7 @@ static int sclhi(struct i2c_algo_bit_data *adap)
51 * chips may hold it low ("clock stretching") while they
52 * are processing data internally.
53 */
54 - if (time_after_eq(jiffies, start + adap->timeout))
55 + if (time_after(jiffies, start + adap->timeout))
56 return -ETIMEDOUT;
57 cond_resched();
58 }
59 diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c
60 index d50b329..2346a89 100644
61 --- a/drivers/i2c/algos/i2c-algo-pca.c
62 +++ b/drivers/i2c/algos/i2c-algo-pca.c
63 @@ -270,10 +270,21 @@ static int pca_xfer(struct i2c_adapter *i2c_adap,
64
65 case 0x30: /* Data byte in I2CDAT has been transmitted; NOT ACK has been received */
66 DEB2("NOT ACK received after data byte\n");
67 + pca_stop(adap);
68 goto out;
69
70 case 0x38: /* Arbitration lost during SLA+W, SLA+R or data bytes */
71 DEB2("Arbitration lost\n");
72 + /*
73 + * The PCA9564 data sheet (2006-09-01) says "A
74 + * START condition will be transmitted when the
75 + * bus becomes free (STOP or SCL and SDA high)"
76 + * when the STA bit is set (p. 11).
77 + *
78 + * In case this won't work, try pca_reset()
79 + * instead.
80 + */
81 + pca_start(adap);
82 goto out;
83
84 case 0x58: /* Data byte has been received; NOT ACK has been returned */
85 diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
86 index 666b7ba..8c50857 100644
87 --- a/drivers/md/bitmap.c
88 +++ b/drivers/md/bitmap.c
89 @@ -986,6 +986,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
90 oldindex = index;
91 oldpage = page;
92
93 + bitmap->filemap[bitmap->file_pages++] = page;
94 + bitmap->last_page_size = count;
95 +
96 if (outofdate) {
97 /*
98 * if bitmap is out of date, dirty the
99 @@ -998,15 +1001,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
100 write_page(bitmap, page, 1);
101
102 ret = -EIO;
103 - if (bitmap->flags & BITMAP_WRITE_ERROR) {
104 - /* release, page not in filemap yet */
105 - put_page(page);
106 + if (bitmap->flags & BITMAP_WRITE_ERROR)
107 goto err;
108 - }
109 }
110 -
111 - bitmap->filemap[bitmap->file_pages++] = page;
112 - bitmap->last_page_size = count;
113 }
114 paddr = kmap_atomic(page, KM_USER0);
115 if (bitmap->flags & BITMAP_HOSTENDIAN)
116 @@ -1016,9 +1013,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
117 kunmap_atomic(paddr, KM_USER0);
118 if (b) {
119 /* if the disk bit is set, set the memory bit */
120 - bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap),
121 - ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start)
122 - );
123 + int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap))
124 + >= start);
125 + bitmap_set_memory_bits(bitmap,
126 + (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap),
127 + needed);
128 bit_cnt++;
129 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
130 }
131 @@ -1154,8 +1153,9 @@ void bitmap_daemon_work(struct bitmap *bitmap)
132 spin_lock_irqsave(&bitmap->lock, flags);
133 clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
134 }
135 - bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
136 - &blocks, 0);
137 + bmc = bitmap_get_counter(bitmap,
138 + (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
139 + &blocks, 0);
140 if (bmc) {
141 /*
142 if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc);
143 @@ -1169,7 +1169,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
144 } else if (*bmc == 1) {
145 /* we can clear the bit */
146 *bmc = 0;
147 - bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
148 + bitmap_count_page(bitmap,
149 + (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
150 -1);
151
152 /* clear the bit */
153 @@ -1485,7 +1486,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
154 unsigned long chunk;
155
156 for (chunk = s; chunk <= e; chunk++) {
157 - sector_t sec = chunk << CHUNK_BLOCK_SHIFT(bitmap);
158 + sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap);
159 bitmap_set_memory_bits(bitmap, sec, 1);
160 bitmap_file_set_bit(bitmap, sec);
161 }
162 diff --git a/drivers/md/md.c b/drivers/md/md.c
163 index 60f3e59..ebbc3bb 100644
164 --- a/drivers/md/md.c
165 +++ b/drivers/md/md.c
166 @@ -2772,11 +2772,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
167 } else
168 err = -EBUSY;
169 spin_unlock_irq(&mddev->write_lock);
170 - } else {
171 - mddev->ro = 0;
172 - mddev->recovery_cp = MaxSector;
173 - err = do_md_run(mddev);
174 - }
175 + } else
176 + err = -EINVAL;
177 break;
178 case active:
179 if (mddev->pers) {
180 diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
181 index dc50f98..b08dd95 100644
182 --- a/drivers/md/raid10.c
183 +++ b/drivers/md/raid10.c
184 @@ -1805,17 +1805,17 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
185 r10_bio->sector = sect;
186
187 raid10_find_phys(conf, r10_bio);
188 - /* Need to check if this section will still be
189 +
190 + /* Need to check if the array will still be
191 * degraded
192 */
193 - for (j=0; j<conf->copies;j++) {
194 - int d = r10_bio->devs[j].devnum;
195 - if (conf->mirrors[d].rdev == NULL ||
196 - test_bit(Faulty, &conf->mirrors[d].rdev->flags)) {
197 + for (j=0; j<conf->raid_disks; j++)
198 + if (conf->mirrors[j].rdev == NULL ||
199 + test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
200 still_degraded = 1;
201 break;
202 }
203 - }
204 +
205 must_sync = bitmap_start_sync(mddev->bitmap, sect,
206 &sync_blocks, still_degraded);
207
208 diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
209 index b70c531..a6e730f 100644
210 --- a/drivers/net/ehea/ehea_main.c
211 +++ b/drivers/net/ehea/ehea_main.c
212 @@ -529,14 +529,17 @@ static inline struct sk_buff *get_skb_by_index(struct sk_buff **skb_array,
213 x &= (arr_len - 1);
214
215 pref = skb_array[x];
216 - prefetchw(pref);
217 - prefetchw(pref + EHEA_CACHE_LINE);
218 -
219 - pref = (skb_array[x]->data);
220 - prefetch(pref);
221 - prefetch(pref + EHEA_CACHE_LINE);
222 - prefetch(pref + EHEA_CACHE_LINE * 2);
223 - prefetch(pref + EHEA_CACHE_LINE * 3);
224 + if (pref) {
225 + prefetchw(pref);
226 + prefetchw(pref + EHEA_CACHE_LINE);
227 +
228 + pref = (skb_array[x]->data);
229 + prefetch(pref);
230 + prefetch(pref + EHEA_CACHE_LINE);
231 + prefetch(pref + EHEA_CACHE_LINE * 2);
232 + prefetch(pref + EHEA_CACHE_LINE * 3);
233 + }
234 +
235 skb = skb_array[skb_index];
236 skb_array[skb_index] = NULL;
237 return skb;
238 @@ -553,12 +556,14 @@ static inline struct sk_buff *get_skb_by_index_ll(struct sk_buff **skb_array,
239 x &= (arr_len - 1);
240
241 pref = skb_array[x];
242 - prefetchw(pref);
243 - prefetchw(pref + EHEA_CACHE_LINE);
244 + if (pref) {
245 + prefetchw(pref);
246 + prefetchw(pref + EHEA_CACHE_LINE);
247
248 - pref = (skb_array[x]->data);
249 - prefetchw(pref);
250 - prefetchw(pref + EHEA_CACHE_LINE);
251 + pref = (skb_array[x]->data);
252 + prefetchw(pref);
253 + prefetchw(pref + EHEA_CACHE_LINE);
254 + }
255
256 skb = skb_array[wqe_index];
257 skb_array[wqe_index] = NULL;
258 diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c
259 index 3612607..32e7acb 100644
260 --- a/drivers/serial/mpc52xx_uart.c
261 +++ b/drivers/serial/mpc52xx_uart.c
262 @@ -515,7 +515,7 @@ mpc52xx_uart_startup(struct uart_port *port)
263
264 /* Request IRQ */
265 ret = request_irq(port->irq, mpc52xx_uart_int,
266 - IRQF_DISABLED | IRQF_SAMPLE_RANDOM | IRQF_SHARED,
267 + IRQF_DISABLED | IRQF_SAMPLE_RANDOM,
268 "mpc52xx_psc_uart", port);
269 if (ret)
270 return ret;
271 diff --git a/drivers/usb/gadget/usbstring.c b/drivers/usb/gadget/usbstring.c
272 index 4154be3..58c4d37 100644
273 --- a/drivers/usb/gadget/usbstring.c
274 +++ b/drivers/usb/gadget/usbstring.c
275 @@ -38,7 +38,7 @@ static int utf8_to_utf16le(const char *s, __le16 *cp, unsigned len)
276 uchar = (c & 0x1f) << 6;
277
278 c = (u8) *s++;
279 - if ((c & 0xc0) != 0xc0)
280 + if ((c & 0xc0) != 0x80)
281 goto fail;
282 c &= 0x3f;
283 uchar |= c;
284 @@ -49,13 +49,13 @@ static int utf8_to_utf16le(const char *s, __le16 *cp, unsigned len)
285 uchar = (c & 0x0f) << 12;
286
287 c = (u8) *s++;
288 - if ((c & 0xc0) != 0xc0)
289 + if ((c & 0xc0) != 0x80)
290 goto fail;
291 c &= 0x3f;
292 uchar |= c << 6;
293
294 c = (u8) *s++;
295 - if ((c & 0xc0) != 0xc0)
296 + if ((c & 0xc0) != 0x80)
297 goto fail;
298 c &= 0x3f;
299 uchar |= c;
300 diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c
301 index 4835bdc..d1c3cba 100644
302 --- a/drivers/video/fb_defio.c
303 +++ b/drivers/video/fb_defio.c
304 @@ -70,8 +70,9 @@ EXPORT_SYMBOL_GPL(fb_deferred_io_fsync);
305
306 /* vm_ops->page_mkwrite handler */
307 static int fb_deferred_io_mkwrite(struct vm_area_struct *vma,
308 - struct page *page)
309 + struct vm_fault *vmf)
310 {
311 + struct page *page = vmf->page;
312 struct fb_info *info = vma->vm_private_data;
313 struct fb_deferred_io *fbdefio = info->fbdefio;
314 struct page *cur;
315 diff --git a/fs/buffer.c b/fs/buffer.c
316 index a5d806d..abe9640 100644
317 --- a/fs/buffer.c
318 +++ b/fs/buffer.c
319 @@ -2402,20 +2402,22 @@ int block_commit_write(struct page *page, unsigned from, unsigned to)
320 * unlock the page.
321 */
322 int
323 -block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
324 +block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
325 get_block_t get_block)
326 {
327 + struct page *page = vmf->page;
328 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
329 unsigned long end;
330 loff_t size;
331 - int ret = -EINVAL;
332 + int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
333
334 lock_page(page);
335 size = i_size_read(inode);
336 if ((page->mapping != inode->i_mapping) ||
337 (page_offset(page) > size)) {
338 /* page got truncated out from underneath us */
339 - goto out_unlock;
340 + unlock_page(page);
341 + goto out;
342 }
343
344 /* page is wholly or partially inside EOF */
345 @@ -2428,8 +2430,16 @@ block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
346 if (!ret)
347 ret = block_commit_write(page, 0, end);
348
349 -out_unlock:
350 - unlock_page(page);
351 + if (unlikely(ret)) {
352 + unlock_page(page);
353 + if (ret == -ENOMEM)
354 + ret = VM_FAULT_OOM;
355 + else /* -ENOSPC, -EIO, etc */
356 + ret = VM_FAULT_SIGBUS;
357 + } else
358 + ret = VM_FAULT_LOCKED;
359 +
360 +out:
361 return ret;
362 }
363
364 diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
365 index 14eb9a2..604ce8a 100644
366 --- a/fs/cifs/cifs_unicode.h
367 +++ b/fs/cifs/cifs_unicode.h
368 @@ -64,6 +64,13 @@ int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *);
369 #endif
370
371 /*
372 + * To be safe - for UCS to UTF-8 with strings loaded with the rare long
373 + * characters alloc more to account for such multibyte target UTF-8
374 + * characters.
375 + */
376 +#define UNICODE_NAME_MAX ((4 * NAME_MAX) + 2)
377 +
378 +/*
379 * UniStrcat: Concatenate the second string to the first
380 *
381 * Returns:
382 diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
383 index 9231e0a..cff0c53 100644
384 --- a/fs/cifs/cifssmb.c
385 +++ b/fs/cifs/cifssmb.c
386 @@ -91,23 +91,22 @@ static int
387 cifs_strncpy_to_host(char **dst, const char *src, const int maxlen,
388 const bool is_unicode, const struct nls_table *nls_codepage)
389 {
390 - int plen;
391 + int src_len, dst_len;
392
393 if (is_unicode) {
394 - plen = UniStrnlen((wchar_t *)src, maxlen);
395 - *dst = kmalloc(plen + 2, GFP_KERNEL);
396 + src_len = UniStrnlen((wchar_t *)src, maxlen);
397 + *dst = kmalloc((4 * src_len) + 2, GFP_KERNEL);
398 if (!*dst)
399 goto cifs_strncpy_to_host_ErrExit;
400 - cifs_strfromUCS_le(*dst, (__le16 *)src, plen, nls_codepage);
401 + dst_len = cifs_strfromUCS_le(*dst, (__le16 *)src, src_len, nls_codepage);
402 + (*dst)[dst_len + 1] = 0;
403 } else {
404 - plen = strnlen(src, maxlen);
405 - *dst = kmalloc(plen + 2, GFP_KERNEL);
406 + src_len = strnlen(src, maxlen);
407 + *dst = kmalloc(src_len + 1, GFP_KERNEL);
408 if (!*dst)
409 goto cifs_strncpy_to_host_ErrExit;
410 - strncpy(*dst, src, plen);
411 + strlcpy(*dst, src, src_len + 1);
412 }
413 - (*dst)[plen] = 0;
414 - (*dst)[plen+1] = 0; /* harmless for ASCII case, needed for Unicode */
415 return 0;
416
417 cifs_strncpy_to_host_ErrExit:
418 diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
419 index 21a1abf..d059b3f 100644
420 --- a/fs/cifs/connect.c
421 +++ b/fs/cifs/connect.c
422 @@ -3549,16 +3549,12 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
423 BCC(smb_buffer_response)) {
424 kfree(tcon->nativeFileSystem);
425 tcon->nativeFileSystem =
426 - kzalloc(2*(length + 1), GFP_KERNEL);
427 + kzalloc((4 * length) + 2, GFP_KERNEL);
428 if (tcon->nativeFileSystem)
429 cifs_strfromUCS_le(
430 tcon->nativeFileSystem,
431 (__le16 *) bcc_ptr,
432 length, nls_codepage);
433 - bcc_ptr += 2 * length;
434 - bcc_ptr[0] = 0; /* null terminate the string */
435 - bcc_ptr[1] = 0;
436 - bcc_ptr += 2;
437 }
438 /* else do not bother copying these information fields*/
439 } else {
440 diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
441 index b891553..6205593 100644
442 --- a/fs/cifs/misc.c
443 +++ b/fs/cifs/misc.c
444 @@ -685,14 +685,15 @@ cifs_convertUCSpath(char *target, const __le16 *source, int maxlen,
445 NLS_MAX_CHARSET_SIZE);
446 if (len > 0) {
447 j += len;
448 - continue;
449 + goto overrun_chk;
450 } else {
451 target[j] = '?';
452 }
453 }
454 j++;
455 /* make sure we do not overrun callers allocated temp buffer */
456 - if (j >= (2 * NAME_MAX))
457 +overrun_chk:
458 + if (j >= UNICODE_NAME_MAX)
459 break;
460 }
461 cUCS_out:
462 diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
463 index 58d5729..2878892 100644
464 --- a/fs/cifs/readdir.c
465 +++ b/fs/cifs/readdir.c
466 @@ -1075,7 +1075,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
467 with the rare long characters alloc more to account for
468 such multibyte target UTF-8 characters. cifs_unicode.c,
469 which actually does the conversion, has the same limit */
470 - tmp_buf = kmalloc((2 * NAME_MAX) + 4, GFP_KERNEL);
471 + tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL);
472 for (i = 0; (i < num_to_fill) && (rc == 0); i++) {
473 if (current_entry == NULL) {
474 /* evaluate whether this case is an error */
475 diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
476 index 89fac77..3890cc2 100644
477 --- a/fs/cifs/sess.c
478 +++ b/fs/cifs/sess.c
479 @@ -202,27 +202,26 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
480 int words_left, len;
481 char *data = *pbcc_area;
482
483 -
484 -
485 cFYI(1, ("bleft %d", bleft));
486
487 -
488 - /* SMB header is unaligned, so cifs servers word align start of
489 - Unicode strings */
490 - data++;
491 - bleft--; /* Windows servers do not always double null terminate
492 - their final Unicode string - in which case we
493 - now will not attempt to decode the byte of junk
494 - which follows it */
495 + /*
496 + * Windows servers do not always double null terminate their final
497 + * Unicode string. Check to see if there are an uneven number of bytes
498 + * left. If so, then add an extra NULL pad byte to the end of the
499 + * response.
500 + *
501 + * See section 2.7.2 in "Implementing CIFS" for details
502 + */
503 + if (bleft % 2) {
504 + data[bleft] = 0;
505 + ++bleft;
506 + }
507
508 words_left = bleft / 2;
509
510 /* save off server operating system */
511 len = UniStrnlen((wchar_t *) data, words_left);
512
513 -/* We look for obvious messed up bcc or strings in response so we do not go off
514 - the end since (at least) WIN2K and Windows XP have a major bug in not null
515 - terminating last Unicode string in response */
516 if (len >= words_left)
517 return rc;
518
519 @@ -260,13 +259,10 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
520 return rc;
521
522 kfree(ses->serverDomain);
523 - ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */
524 - if (ses->serverDomain != NULL) {
525 + ses->serverDomain = kzalloc((4 * len) + 2, GFP_KERNEL);
526 + if (ses->serverDomain != NULL)
527 cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len,
528 nls_cp);
529 - ses->serverDomain[2*len] = 0;
530 - ses->serverDomain[(2*len) + 1] = 0;
531 - }
532 data += 2 * (len + 1);
533 words_left -= len + 1;
534
535 @@ -616,12 +612,18 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
536 }
537
538 /* BB check if Unicode and decode strings */
539 - if (smb_buf->Flags2 & SMBFLG2_UNICODE)
540 + if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
541 + /* unicode string area must be word-aligned */
542 + if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
543 + ++bcc_ptr;
544 + --bytes_remaining;
545 + }
546 rc = decode_unicode_ssetup(&bcc_ptr, bytes_remaining,
547 - ses, nls_cp);
548 - else
549 + ses, nls_cp);
550 + } else {
551 rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining,
552 ses, nls_cp);
553 + }
554
555 ssetup_exit:
556 if (spnego_key)
557 diff --git a/fs/eventpoll.c b/fs/eventpoll.c
558 index 801de2c..fd5835b 100644
559 --- a/fs/eventpoll.c
560 +++ b/fs/eventpoll.c
561 @@ -1132,7 +1132,7 @@ error_return:
562
563 SYSCALL_DEFINE1(epoll_create, int, size)
564 {
565 - if (size < 0)
566 + if (size <= 0)
567 return -EINVAL;
568
569 return sys_epoll_create1(0);
570 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
571 index f613d57..eadbee3 100644
572 --- a/fs/ext4/ext4.h
573 +++ b/fs/ext4/ext4.h
574 @@ -1084,7 +1084,7 @@ extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
575 extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
576 extern int ext4_block_truncate_page(handle_t *handle,
577 struct address_space *mapping, loff_t from);
578 -extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
579 +extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
580
581 /* ioctl.c */
582 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
583 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
584 index b233ade..63b911b 100644
585 --- a/fs/ext4/inode.c
586 +++ b/fs/ext4/inode.c
587 @@ -4861,8 +4861,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
588 return !buffer_mapped(bh);
589 }
590
591 -int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
592 +int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
593 {
594 + struct page *page = vmf->page;
595 loff_t size;
596 unsigned long len;
597 int ret = -EINVAL;
598 @@ -4913,6 +4914,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
599 goto out_unlock;
600 ret = 0;
601 out_unlock:
602 + if (ret)
603 + ret = VM_FAULT_SIGBUS;
604 up_read(&inode->i_alloc_sem);
605 return ret;
606 }
607 diff --git a/fs/fcntl.c b/fs/fcntl.c
608 index 08a109b..ac79b7e 100644
609 --- a/fs/fcntl.c
610 +++ b/fs/fcntl.c
611 @@ -117,11 +117,13 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
612 {
613 if (unlikely(newfd == oldfd)) { /* corner case */
614 struct files_struct *files = current->files;
615 + int retval = oldfd;
616 +
617 rcu_read_lock();
618 if (!fcheck_files(files, oldfd))
619 - oldfd = -EBADF;
620 + retval = -EBADF;
621 rcu_read_unlock();
622 - return oldfd;
623 + return retval;
624 }
625 return sys_dup3(oldfd, newfd, 0);
626 }
627 diff --git a/fs/fuse/file.c b/fs/fuse/file.c
628 index 3ada9d7..0c92f15 100644
629 --- a/fs/fuse/file.c
630 +++ b/fs/fuse/file.c
631 @@ -1219,8 +1219,9 @@ static void fuse_vma_close(struct vm_area_struct *vma)
632 * - sync(2)
633 * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
634 */
635 -static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page)
636 +static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
637 {
638 + struct page *page = vmf->page;
639 /*
640 * Don't use page->mapping as it may become NULL from a
641 * concurrent truncate.
642 diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
643 index e9a366d..641c43b 100644
644 --- a/fs/gfs2/ops_file.c
645 +++ b/fs/gfs2/ops_file.c
646 @@ -338,8 +338,9 @@ static int gfs2_allocate_page_backing(struct page *page)
647 * blocks allocated on disk to back that page.
648 */
649
650 -static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
651 +static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
652 {
653 + struct page *page = vmf->page;
654 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
655 struct gfs2_inode *ip = GFS2_I(inode);
656 struct gfs2_sbd *sdp = GFS2_SB(inode);
657 @@ -411,6 +412,10 @@ out_unlock:
658 gfs2_glock_dq(&gh);
659 out:
660 gfs2_holder_uninit(&gh);
661 + if (ret == -ENOMEM)
662 + ret = VM_FAULT_OOM;
663 + else if (ret)
664 + ret = VM_FAULT_SIGBUS;
665 return ret;
666 }
667
668 diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
669 index 74f92b7..bff8733 100644
670 --- a/fs/nfs/dir.c
671 +++ b/fs/nfs/dir.c
672 @@ -1613,8 +1613,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
673 } else if (atomic_read(&new_dentry->d_count) > 1)
674 /* dentry still busy? */
675 goto out;
676 - } else
677 - nfs_drop_nlink(new_inode);
678 + }
679
680 go_ahead:
681 /*
682 @@ -1627,10 +1626,8 @@ go_ahead:
683 }
684 nfs_inode_return_delegation(old_inode);
685
686 - if (new_inode != NULL) {
687 + if (new_inode != NULL)
688 nfs_inode_return_delegation(new_inode);
689 - d_delete(new_dentry);
690 - }
691
692 error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
693 new_dir, &new_dentry->d_name);
694 @@ -1639,6 +1636,8 @@ out:
695 if (rehash)
696 d_rehash(rehash);
697 if (!error) {
698 + if (new_inode != NULL)
699 + nfs_drop_nlink(new_inode);
700 d_move(old_dentry, new_dentry);
701 nfs_set_verifier(new_dentry,
702 nfs_save_change_attribute(new_dir));
703 diff --git a/fs/nfs/file.c b/fs/nfs/file.c
704 index 30541f0..4a57a0f 100644
705 --- a/fs/nfs/file.c
706 +++ b/fs/nfs/file.c
707 @@ -448,8 +448,9 @@ const struct address_space_operations nfs_file_aops = {
708 .launder_page = nfs_launder_page,
709 };
710
711 -static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
712 +static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
713 {
714 + struct page *page = vmf->page;
715 struct file *filp = vma->vm_file;
716 struct dentry *dentry = filp->f_path.dentry;
717 unsigned pagelen;
718 @@ -476,11 +477,11 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
719 goto out_unlock;
720
721 ret = nfs_updatepage(filp, page, 0, pagelen);
722 - if (ret == 0)
723 - ret = pagelen;
724 out_unlock:
725 + if (!ret)
726 + return VM_FAULT_LOCKED;
727 unlock_page(page);
728 - return ret;
729 + return VM_FAULT_SIGBUS;
730 }
731
732 static struct vm_operations_struct nfs_file_vm_ops = {
733 diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
734 index b0b07df..abffc90 100644
735 --- a/fs/nfsd/nfs4xdr.c
736 +++ b/fs/nfsd/nfs4xdr.c
737 @@ -1833,6 +1833,15 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
738 dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
739 if (IS_ERR(dentry))
740 return nfserrno(PTR_ERR(dentry));
741 + if (!dentry->d_inode) {
742 + /*
743 + * nfsd_buffered_readdir drops the i_mutex between
744 + * readdir and calling this callback, leaving a window
745 + * where this directory entry could have gone away.
746 + */
747 + dput(dentry);
748 + return nfserr_noent;
749 + }
750
751 exp_get(exp);
752 /*
753 @@ -1895,6 +1904,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
754 struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
755 int buflen;
756 __be32 *p = cd->buffer;
757 + __be32 *cookiep;
758 __be32 nfserr = nfserr_toosmall;
759
760 /* In nfsv4, "." and ".." never make it onto the wire.. */
761 @@ -1911,7 +1921,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
762 goto fail;
763
764 *p++ = xdr_one; /* mark entry present */
765 - cd->offset = p; /* remember pointer */
766 + cookiep = p;
767 p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */
768 p = xdr_encode_array(p, name, namlen); /* name length & name */
769
770 @@ -1925,6 +1935,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
771 goto fail;
772 case nfserr_dropit:
773 goto fail;
774 + case nfserr_noent:
775 + goto skip_entry;
776 default:
777 /*
778 * If the client requested the RDATTR_ERROR attribute,
779 @@ -1943,6 +1955,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
780 }
781 cd->buflen -= (p - cd->buffer);
782 cd->buffer = p;
783 + cd->offset = cookiep;
784 +skip_entry:
785 cd->common.err = nfs_ok;
786 return 0;
787 fail:
788 diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
789 index 6f7ea0a..08af0ed 100644
790 --- a/fs/ocfs2/file.c
791 +++ b/fs/ocfs2/file.c
792 @@ -2075,6 +2075,22 @@ out_sems:
793 return written ? written : ret;
794 }
795
796 +static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
797 + struct file *out,
798 + struct splice_desc *sd)
799 +{
800 + int ret;
801 +
802 + ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos,
803 + sd->total_len, 0, NULL);
804 + if (ret < 0) {
805 + mlog_errno(ret);
806 + return ret;
807 + }
808 +
809 + return splice_from_pipe_feed(pipe, sd, pipe_to_file);
810 +}
811 +
812 static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
813 struct file *out,
814 loff_t *ppos,
815 @@ -2082,38 +2098,76 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
816 unsigned int flags)
817 {
818 int ret;
819 - struct inode *inode = out->f_path.dentry->d_inode;
820 + struct address_space *mapping = out->f_mapping;
821 + struct inode *inode = mapping->host;
822 + struct splice_desc sd = {
823 + .total_len = len,
824 + .flags = flags,
825 + .pos = *ppos,
826 + .u.file = out,
827 + };
828
829 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe,
830 (unsigned int)len,
831 out->f_path.dentry->d_name.len,
832 out->f_path.dentry->d_name.name);
833
834 - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
835 + if (pipe->inode)
836 + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
837
838 - ret = ocfs2_rw_lock(inode, 1);
839 - if (ret < 0) {
840 - mlog_errno(ret);
841 - goto out;
842 - }
843 + splice_from_pipe_begin(&sd);
844 + do {
845 + ret = splice_from_pipe_next(pipe, &sd);
846 + if (ret <= 0)
847 + break;
848
849 - ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0,
850 - NULL);
851 - if (ret < 0) {
852 - mlog_errno(ret);
853 - goto out_unlock;
854 - }
855 + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
856 + ret = ocfs2_rw_lock(inode, 1);
857 + if (ret < 0)
858 + mlog_errno(ret);
859 + else {
860 + ret = ocfs2_splice_to_file(pipe, out, &sd);
861 + ocfs2_rw_unlock(inode, 1);
862 + }
863 + mutex_unlock(&inode->i_mutex);
864 + } while (ret > 0);
865 + splice_from_pipe_end(pipe, &sd);
866
867 if (pipe->inode)
868 - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
869 - ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);
870 - if (pipe->inode)
871 mutex_unlock(&pipe->inode->i_mutex);
872
873 -out_unlock:
874 - ocfs2_rw_unlock(inode, 1);
875 -out:
876 - mutex_unlock(&inode->i_mutex);
877 + if (sd.num_spliced)
878 + ret = sd.num_spliced;
879 +
880 + if (ret > 0) {
881 + unsigned long nr_pages;
882 +
883 + *ppos += ret;
884 + nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
885 +
886 + /*
887 + * If file or inode is SYNC and we actually wrote some data,
888 + * sync it.
889 + */
890 + if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
891 + int err;
892 +
893 + mutex_lock(&inode->i_mutex);
894 + err = ocfs2_rw_lock(inode, 1);
895 + if (err < 0) {
896 + mlog_errno(err);
897 + } else {
898 + err = generic_osync_inode(inode, mapping,
899 + OSYNC_METADATA|OSYNC_DATA);
900 + ocfs2_rw_unlock(inode, 1);
901 + }
902 + mutex_unlock(&inode->i_mutex);
903 +
904 + if (err)
905 + ret = err;
906 + }
907 + balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
908 + }
909
910 mlog_exit(ret);
911 return ret;
912 diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
913 index 3dc18d6..2383cbd 100644
914 --- a/fs/ocfs2/mmap.c
915 +++ b/fs/ocfs2/mmap.c
916 @@ -150,8 +150,9 @@ out:
917 return ret;
918 }
919
920 -static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
921 +static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
922 {
923 + struct page *page = vmf->page;
924 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
925 struct buffer_head *di_bh = NULL;
926 sigset_t blocked, oldset;
927 @@ -192,7 +193,8 @@ out:
928 ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
929 if (ret2 < 0)
930 mlog_errno(ret2);
931 -
932 + if (ret)
933 + ret = VM_FAULT_SIGBUS;
934 return ret;
935 }
936
937 diff --git a/fs/splice.c b/fs/splice.c
938 index aea1eb4..2f2d8c1 100644
939 --- a/fs/splice.c
940 +++ b/fs/splice.c
941 @@ -553,8 +553,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
942 * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
943 * a new page in the output file page cache and fill/dirty that.
944 */
945 -static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
946 - struct splice_desc *sd)
947 +int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
948 + struct splice_desc *sd)
949 {
950 struct file *file = sd->u.file;
951 struct address_space *mapping = file->f_mapping;
952 @@ -598,108 +598,178 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
953 out:
954 return ret;
955 }
956 +EXPORT_SYMBOL(pipe_to_file);
957 +
958 +static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
959 +{
960 + smp_mb();
961 + if (waitqueue_active(&pipe->wait))
962 + wake_up_interruptible(&pipe->wait);
963 + kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
964 +}
965
966 /**
967 - * __splice_from_pipe - splice data from a pipe to given actor
968 + * splice_from_pipe_feed - feed available data from a pipe to a file
969 * @pipe: pipe to splice from
970 * @sd: information to @actor
971 * @actor: handler that splices the data
972 *
973 * Description:
974 - * This function does little more than loop over the pipe and call
975 - * @actor to do the actual moving of a single struct pipe_buffer to
976 - * the desired destination. See pipe_to_file, pipe_to_sendpage, or
977 - * pipe_to_user.
978 +
979 + * This function loops over the pipe and calls @actor to do the
980 + * actual moving of a single struct pipe_buffer to the desired
981 + * destination. It returns when there's no more buffers left in
982 + * the pipe or if the requested number of bytes (@sd->total_len)
983 + * have been copied. It returns a positive number (one) if the
984 + * pipe needs to be filled with more data, zero if the required
985 + * number of bytes have been copied and -errno on error.
986 *
987 + * This, together with splice_from_pipe_{begin,end,next}, may be
988 + * used to implement the functionality of __splice_from_pipe() when
989 + * locking is required around copying the pipe buffers to the
990 + * destination.
991 */
992 -ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
993 - splice_actor *actor)
994 +int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
995 + splice_actor *actor)
996 {
997 - int ret, do_wakeup, err;
998 -
999 - ret = 0;
1000 - do_wakeup = 0;
1001 -
1002 - for (;;) {
1003 - if (pipe->nrbufs) {
1004 - struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
1005 - const struct pipe_buf_operations *ops = buf->ops;
1006 + int ret;
1007
1008 - sd->len = buf->len;
1009 - if (sd->len > sd->total_len)
1010 - sd->len = sd->total_len;
1011 + while (pipe->nrbufs) {
1012 + struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
1013 + const struct pipe_buf_operations *ops = buf->ops;
1014
1015 - err = actor(pipe, buf, sd);
1016 - if (err <= 0) {
1017 - if (!ret && err != -ENODATA)
1018 - ret = err;
1019 + sd->len = buf->len;
1020 + if (sd->len > sd->total_len)
1021 + sd->len = sd->total_len;
1022
1023 - break;
1024 - }
1025 + ret = actor(pipe, buf, sd);
1026 + if (ret <= 0) {
1027 + if (ret == -ENODATA)
1028 + ret = 0;
1029 + return ret;
1030 + }
1031 + buf->offset += ret;
1032 + buf->len -= ret;
1033
1034 - ret += err;
1035 - buf->offset += err;
1036 - buf->len -= err;
1037 + sd->num_spliced += ret;
1038 + sd->len -= ret;
1039 + sd->pos += ret;
1040 + sd->total_len -= ret;
1041
1042 - sd->len -= err;
1043 - sd->pos += err;
1044 - sd->total_len -= err;
1045 - if (sd->len)
1046 - continue;
1047 + if (!buf->len) {
1048 + buf->ops = NULL;
1049 + ops->release(pipe, buf);
1050 + pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
1051 + pipe->nrbufs--;
1052 + if (pipe->inode)
1053 + sd->need_wakeup = true;
1054 + }
1055
1056 - if (!buf->len) {
1057 - buf->ops = NULL;
1058 - ops->release(pipe, buf);
1059 - pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
1060 - pipe->nrbufs--;
1061 - if (pipe->inode)
1062 - do_wakeup = 1;
1063 - }
1064 + if (!sd->total_len)
1065 + return 0;
1066 + }
1067
1068 - if (!sd->total_len)
1069 - break;
1070 - }
1071 + return 1;
1072 +}
1073 +EXPORT_SYMBOL(splice_from_pipe_feed);
1074
1075 - if (pipe->nrbufs)
1076 - continue;
1077 +/**
1078 + * splice_from_pipe_next - wait for some data to splice from
1079 + * @pipe: pipe to splice from
1080 + * @sd: information about the splice operation
1081 + *
1082 + * Description:
1083 + * This function will wait for some data and return a positive
1084 + * value (one) if pipe buffers are available. It will return zero
1085 + * or -errno if no more data needs to be spliced.
1086 + */
1087 +int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
1088 +{
1089 + while (!pipe->nrbufs) {
1090 if (!pipe->writers)
1091 - break;
1092 - if (!pipe->waiting_writers) {
1093 - if (ret)
1094 - break;
1095 - }
1096 + return 0;
1097
1098 - if (sd->flags & SPLICE_F_NONBLOCK) {
1099 - if (!ret)
1100 - ret = -EAGAIN;
1101 - break;
1102 - }
1103 + if (!pipe->waiting_writers && sd->num_spliced)
1104 + return 0;
1105
1106 - if (signal_pending(current)) {
1107 - if (!ret)
1108 - ret = -ERESTARTSYS;
1109 - break;
1110 - }
1111 + if (sd->flags & SPLICE_F_NONBLOCK)
1112 + return -EAGAIN;
1113
1114 - if (do_wakeup) {
1115 - smp_mb();
1116 - if (waitqueue_active(&pipe->wait))
1117 - wake_up_interruptible_sync(&pipe->wait);
1118 - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
1119 - do_wakeup = 0;
1120 + if (signal_pending(current))
1121 + return -ERESTARTSYS;
1122 +
1123 + if (sd->need_wakeup) {
1124 + wakeup_pipe_writers(pipe);
1125 + sd->need_wakeup = false;
1126 }
1127
1128 pipe_wait(pipe);
1129 }
1130
1131 - if (do_wakeup) {
1132 - smp_mb();
1133 - if (waitqueue_active(&pipe->wait))
1134 - wake_up_interruptible(&pipe->wait);
1135 - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
1136 - }
1137 + return 1;
1138 +}
1139 +EXPORT_SYMBOL(splice_from_pipe_next);
1140
1141 - return ret;
1142 +/**
1143 + * splice_from_pipe_begin - start splicing from pipe
1144 + * @pipe: pipe to splice from
1145 + *
1146 + * Description:
1147 + * This function should be called before a loop containing
1148 + * splice_from_pipe_next() and splice_from_pipe_feed() to
1149 + * initialize the necessary fields of @sd.
1150 + */
1151 +void splice_from_pipe_begin(struct splice_desc *sd)
1152 +{
1153 + sd->num_spliced = 0;
1154 + sd->need_wakeup = false;
1155 +}
1156 +EXPORT_SYMBOL(splice_from_pipe_begin);
1157 +
1158 +/**
1159 + * splice_from_pipe_end - finish splicing from pipe
1160 + * @pipe: pipe to splice from
1161 + * @sd: information about the splice operation
1162 + *
1163 + * Description:
1164 + * This function will wake up pipe writers if necessary. It should
1165 + * be called after a loop containing splice_from_pipe_next() and
1166 + * splice_from_pipe_feed().
1167 + */
1168 +void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
1169 +{
1170 + if (sd->need_wakeup)
1171 + wakeup_pipe_writers(pipe);
1172 +}
1173 +EXPORT_SYMBOL(splice_from_pipe_end);
1174 +
1175 +/**
1176 + * __splice_from_pipe - splice data from a pipe to given actor
1177 + * @pipe: pipe to splice from
1178 + * @sd: information to @actor
1179 + * @actor: handler that splices the data
1180 + *
1181 + * Description:
1182 + * This function does little more than loop over the pipe and call
1183 + * @actor to do the actual moving of a single struct pipe_buffer to
1184 + * the desired destination. See pipe_to_file, pipe_to_sendpage, or
1185 + * pipe_to_user.
1186 + *
1187 + */
1188 +ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
1189 + splice_actor *actor)
1190 +{
1191 + int ret;
1192 +
1193 + splice_from_pipe_begin(sd);
1194 + do {
1195 + ret = splice_from_pipe_next(pipe, sd);
1196 + if (ret > 0)
1197 + ret = splice_from_pipe_feed(pipe, sd, actor);
1198 + } while (ret > 0);
1199 + splice_from_pipe_end(pipe, sd);
1200 +
1201 + return sd->num_spliced ? sd->num_spliced : ret;
1202 }
1203 EXPORT_SYMBOL(__splice_from_pipe);
1204
1205 @@ -713,7 +783,7 @@ EXPORT_SYMBOL(__splice_from_pipe);
1206 * @actor: handler that splices the data
1207 *
1208 * Description:
1209 - * See __splice_from_pipe. This function locks the input and output inodes,
1210 + * See __splice_from_pipe. This function locks the pipe inode,
1211 * otherwise it's identical to __splice_from_pipe().
1212 *
1213 */
1214 @@ -722,7 +792,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
1215 splice_actor *actor)
1216 {
1217 ssize_t ret;
1218 - struct inode *inode = out->f_mapping->host;
1219 struct splice_desc sd = {
1220 .total_len = len,
1221 .flags = flags,
1222 @@ -730,24 +799,11 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
1223 .u.file = out,
1224 };
1225
1226 - /*
1227 - * The actor worker might be calling ->prepare_write and
1228 - * ->commit_write. Most of the time, these expect i_mutex to
1229 - * be held. Since this may result in an ABBA deadlock with
1230 - * pipe->inode, we have to order lock acquiry here.
1231 - *
1232 - * Outer lock must be inode->i_mutex, as pipe_wait() will
1233 - * release and reacquire pipe->inode->i_mutex, AND inode must
1234 - * never be a pipe.
1235 - */
1236 - WARN_ON(S_ISFIFO(inode->i_mode));
1237 - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
1238 if (pipe->inode)
1239 - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
1240 + mutex_lock(&pipe->inode->i_mutex);
1241 ret = __splice_from_pipe(pipe, &sd, actor);
1242 if (pipe->inode)
1243 mutex_unlock(&pipe->inode->i_mutex);
1244 - mutex_unlock(&inode->i_mutex);
1245
1246 return ret;
1247 }
1248 @@ -838,17 +894,29 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
1249 };
1250 ssize_t ret;
1251
1252 - WARN_ON(S_ISFIFO(inode->i_mode));
1253 - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
1254 - ret = file_remove_suid(out);
1255 - if (likely(!ret)) {
1256 - if (pipe->inode)
1257 - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
1258 - ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
1259 - if (pipe->inode)
1260 - mutex_unlock(&pipe->inode->i_mutex);
1261 - }
1262 - mutex_unlock(&inode->i_mutex);
1263 + if (pipe->inode)
1264 + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
1265 +
1266 + splice_from_pipe_begin(&sd);
1267 + do {
1268 + ret = splice_from_pipe_next(pipe, &sd);
1269 + if (ret <= 0)
1270 + break;
1271 +
1272 + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
1273 + ret = file_remove_suid(out);
1274 + if (!ret)
1275 + ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file);
1276 + mutex_unlock(&inode->i_mutex);
1277 + } while (ret > 0);
1278 + splice_from_pipe_end(pipe, &sd);
1279 +
1280 + if (pipe->inode)
1281 + mutex_unlock(&pipe->inode->i_mutex);
1282 +
1283 + if (sd.num_spliced)
1284 + ret = sd.num_spliced;
1285 +
1286 if (ret > 0) {
1287 unsigned long nr_pages;
1288
1289 diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
1290 index 40033dc..82b1c4a 100644
1291 --- a/fs/ubifs/file.c
1292 +++ b/fs/ubifs/file.c
1293 @@ -1140,8 +1140,9 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
1294 * mmap()d file has taken write protection fault and is being made
1295 * writable. UBIFS must ensure page is budgeted for.
1296 */
1297 -static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1298 +static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1299 {
1300 + struct page *page = vmf->page;
1301 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1302 struct ubifs_info *c = inode->i_sb->s_fs_info;
1303 struct timespec now = ubifs_current_time(inode);
1304 @@ -1153,7 +1154,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1305 ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY));
1306
1307 if (unlikely(c->ro_media))
1308 - return -EROFS;
1309 + return VM_FAULT_SIGBUS; /* -EROFS */
1310
1311 /*
1312 * We have not locked @page so far so we may budget for changing the
1313 @@ -1186,7 +1187,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1314 if (err == -ENOSPC)
1315 ubifs_warn("out of space for mmapped file "
1316 "(inode number %lu)", inode->i_ino);
1317 - return err;
1318 + return VM_FAULT_SIGBUS;
1319 }
1320
1321 lock_page(page);
1322 @@ -1226,6 +1227,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1323 out_unlock:
1324 unlock_page(page);
1325 ubifs_release_budget(c, &req);
1326 + if (err)
1327 + err = VM_FAULT_SIGBUS;
1328 return err;
1329 }
1330
1331 diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
1332 index 5311c1a..469502c 100644
1333 --- a/fs/xfs/linux-2.6/xfs_file.c
1334 +++ b/fs/xfs/linux-2.6/xfs_file.c
1335 @@ -427,9 +427,9 @@ xfs_file_ioctl_invis(
1336 STATIC int
1337 xfs_vm_page_mkwrite(
1338 struct vm_area_struct *vma,
1339 - struct page *page)
1340 + struct vm_fault *vmf)
1341 {
1342 - return block_page_mkwrite(vma, page, xfs_get_blocks);
1343 + return block_page_mkwrite(vma, vmf, xfs_get_blocks);
1344 }
1345
1346 const struct file_operations xfs_file_operations = {
1347 diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
1348 index eadaab4..657c072 100644
1349 --- a/include/linux/buffer_head.h
1350 +++ b/include/linux/buffer_head.h
1351 @@ -222,7 +222,7 @@ int cont_write_begin(struct file *, struct address_space *, loff_t,
1352 get_block_t *, loff_t *);
1353 int generic_cont_expand_simple(struct inode *inode, loff_t size);
1354 int block_commit_write(struct page *page, unsigned from, unsigned to);
1355 -int block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
1356 +int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
1357 get_block_t get_block);
1358 void block_sync_page(struct page *);
1359 sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
1360 diff --git a/include/linux/mm.h b/include/linux/mm.h
1361 index 2a75579..ae9775d 100644
1362 --- a/include/linux/mm.h
1363 +++ b/include/linux/mm.h
1364 @@ -138,6 +138,7 @@ extern pgprot_t protection_map[16];
1365
1366 #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */
1367 #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */
1368 +#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */
1369
1370
1371 /*
1372 @@ -173,7 +174,7 @@ struct vm_operations_struct {
1373
1374 /* notification that a previously read-only page is about to become
1375 * writable, if an error is returned it will cause a SIGBUS */
1376 - int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
1377 + int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);
1378
1379 /* called by access_process_vm when get_user_pages() fails, typically
1380 * for use by special VMAs that can switch between memory and hardware
1381 diff --git a/include/linux/splice.h b/include/linux/splice.h
1382 index 528dcb9..5f3faa9 100644
1383 --- a/include/linux/splice.h
1384 +++ b/include/linux/splice.h
1385 @@ -36,6 +36,8 @@ struct splice_desc {
1386 void *data; /* cookie */
1387 } u;
1388 loff_t pos; /* file position */
1389 + size_t num_spliced; /* number of bytes already spliced */
1390 + bool need_wakeup; /* need to wake up writer */
1391 };
1392
1393 struct partial_page {
1394 @@ -66,6 +68,16 @@ extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
1395 splice_actor *);
1396 extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
1397 struct splice_desc *, splice_actor *);
1398 +extern int splice_from_pipe_feed(struct pipe_inode_info *, struct splice_desc *,
1399 + splice_actor *);
1400 +extern int splice_from_pipe_next(struct pipe_inode_info *,
1401 + struct splice_desc *);
1402 +extern void splice_from_pipe_begin(struct splice_desc *);
1403 +extern void splice_from_pipe_end(struct pipe_inode_info *,
1404 + struct splice_desc *);
1405 +extern int pipe_to_file(struct pipe_inode_info *, struct pipe_buffer *,
1406 + struct splice_desc *);
1407 +
1408 extern ssize_t splice_to_pipe(struct pipe_inode_info *,
1409 struct splice_pipe_desc *);
1410 extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
1411 diff --git a/mm/memory.c b/mm/memory.c
1412 index 1002f47..3856c36 100644
1413 --- a/mm/memory.c
1414 +++ b/mm/memory.c
1415 @@ -1801,6 +1801,15 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1416 * get_user_pages(.write=1, .force=1).
1417 */
1418 if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
1419 + struct vm_fault vmf;
1420 + int tmp;
1421 +
1422 + vmf.virtual_address = (void __user *)(address &
1423 + PAGE_MASK);
1424 + vmf.pgoff = old_page->index;
1425 + vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
1426 + vmf.page = old_page;
1427 +
1428 /*
1429 * Notify the address space that the page is about to
1430 * become writable so that it can prohibit this or wait
1431 @@ -1812,8 +1821,21 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1432 page_cache_get(old_page);
1433 pte_unmap_unlock(page_table, ptl);
1434
1435 - if (vma->vm_ops->page_mkwrite(vma, old_page) < 0)
1436 + tmp = vma->vm_ops->page_mkwrite(vma, &vmf);
1437 + if (unlikely(tmp &
1438 + (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) {
1439 + ret = tmp;
1440 goto unwritable_page;
1441 + }
1442 + if (unlikely(!(tmp & VM_FAULT_LOCKED))) {
1443 + lock_page(old_page);
1444 + if (!old_page->mapping) {
1445 + ret = 0; /* retry the fault */
1446 + unlock_page(old_page);
1447 + goto unwritable_page;
1448 + }
1449 + } else
1450 + VM_BUG_ON(!PageLocked(old_page));
1451
1452 /*
1453 * Since we dropped the lock we need to revalidate
1454 @@ -1823,9 +1845,11 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1455 */
1456 page_table = pte_offset_map_lock(mm, pmd, address,
1457 &ptl);
1458 - page_cache_release(old_page);
1459 - if (!pte_same(*page_table, orig_pte))
1460 + if (!pte_same(*page_table, orig_pte)) {
1461 + unlock_page(old_page);
1462 + page_cache_release(old_page);
1463 goto unlock;
1464 + }
1465
1466 page_mkwrite = 1;
1467 }
1468 @@ -1930,9 +1954,6 @@ gotten:
1469 unlock:
1470 pte_unmap_unlock(page_table, ptl);
1471 if (dirty_page) {
1472 - if (vma->vm_file)
1473 - file_update_time(vma->vm_file);
1474 -
1475 /*
1476 * Yes, Virginia, this is actually required to prevent a race
1477 * with clear_page_dirty_for_io() from clearing the page dirty
1478 @@ -1941,21 +1962,46 @@ unlock:
1479 *
1480 * do_no_page is protected similarly.
1481 */
1482 - wait_on_page_locked(dirty_page);
1483 - set_page_dirty_balance(dirty_page, page_mkwrite);
1484 + if (!page_mkwrite) {
1485 + wait_on_page_locked(dirty_page);
1486 + set_page_dirty_balance(dirty_page, page_mkwrite);
1487 + }
1488 put_page(dirty_page);
1489 + if (page_mkwrite) {
1490 + struct address_space *mapping = dirty_page->mapping;
1491 +
1492 + set_page_dirty(dirty_page);
1493 + unlock_page(dirty_page);
1494 + page_cache_release(dirty_page);
1495 + if (mapping) {
1496 + /*
1497 + * Some device drivers do not set page.mapping
1498 + * but still dirty their pages
1499 + */
1500 + balance_dirty_pages_ratelimited(mapping);
1501 + }
1502 + }
1503 +
1504 + /* file_update_time outside page_lock */
1505 + if (vma->vm_file)
1506 + file_update_time(vma->vm_file);
1507 }
1508 return ret;
1509 oom_free_new:
1510 page_cache_release(new_page);
1511 oom:
1512 - if (old_page)
1513 + if (old_page) {
1514 + if (page_mkwrite) {
1515 + unlock_page(old_page);
1516 + page_cache_release(old_page);
1517 + }
1518 page_cache_release(old_page);
1519 + }
1520 return VM_FAULT_OOM;
1521
1522 unwritable_page:
1523 page_cache_release(old_page);
1524 - return VM_FAULT_SIGBUS;
1525 + return ret;
1526 }
1527
1528 /*
1529 @@ -2472,25 +2518,25 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
1530 * to become writable
1531 */
1532 if (vma->vm_ops->page_mkwrite) {
1533 + int tmp;
1534 +
1535 unlock_page(page);
1536 - if (vma->vm_ops->page_mkwrite(vma, page) < 0) {
1537 - ret = VM_FAULT_SIGBUS;
1538 - anon = 1; /* no anon but release vmf.page */
1539 - goto out_unlocked;
1540 - }
1541 - lock_page(page);
1542 - /*
1543 - * XXX: this is not quite right (racy vs
1544 - * invalidate) to unlock and relock the page
1545 - * like this, however a better fix requires
1546 - * reworking page_mkwrite locking API, which
1547 - * is better done later.
1548 - */
1549 - if (!page->mapping) {
1550 - ret = 0;
1551 - anon = 1; /* no anon but release vmf.page */
1552 - goto out;
1553 + vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
1554 + tmp = vma->vm_ops->page_mkwrite(vma, &vmf);
1555 + if (unlikely(tmp &
1556 + (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) {
1557 + ret = tmp;
1558 + goto unwritable_page;
1559 }
1560 + if (unlikely(!(tmp & VM_FAULT_LOCKED))) {
1561 + lock_page(page);
1562 + if (!page->mapping) {
1563 + ret = 0; /* retry the fault */
1564 + unlock_page(page);
1565 + goto unwritable_page;
1566 + }
1567 + } else
1568 + VM_BUG_ON(!PageLocked(page));
1569 page_mkwrite = 1;
1570 }
1571 }
1572 @@ -2547,19 +2593,35 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
1573 pte_unmap_unlock(page_table, ptl);
1574
1575 out:
1576 - unlock_page(vmf.page);
1577 -out_unlocked:
1578 - if (anon)
1579 - page_cache_release(vmf.page);
1580 - else if (dirty_page) {
1581 - if (vma->vm_file)
1582 - file_update_time(vma->vm_file);
1583 + if (dirty_page) {
1584 + struct address_space *mapping = page->mapping;
1585
1586 - set_page_dirty_balance(dirty_page, page_mkwrite);
1587 + if (set_page_dirty(dirty_page))
1588 + page_mkwrite = 1;
1589 + unlock_page(dirty_page);
1590 put_page(dirty_page);
1591 + if (page_mkwrite && mapping) {
1592 + /*
1593 + * Some device drivers do not set page.mapping but still
1594 + * dirty their pages
1595 + */
1596 + balance_dirty_pages_ratelimited(mapping);
1597 + }
1598 +
1599 + /* file_update_time outside page_lock */
1600 + if (vma->vm_file)
1601 + file_update_time(vma->vm_file);
1602 + } else {
1603 + unlock_page(vmf.page);
1604 + if (anon)
1605 + page_cache_release(vmf.page);
1606 }
1607
1608 return ret;
1609 +
1610 +unwritable_page:
1611 + page_cache_release(page);
1612 + return ret;
1613 }
1614
1615 static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,