Contents of /trunk/kernel26-alx/patches-2.6.17-r6/0028-2.6.17-fs-fcache-v2.1.patch
Parent Directory | Revision Log
Revision 199 -
(show annotations)
(download)
Fri May 18 11:04:36 2007 UTC (17 years, 4 months ago) by niro
File size: 46406 byte(s)
Fri May 18 11:04:36 2007 UTC (17 years, 4 months ago) by niro
File size: 46406 byte(s)
-import
1 | A frontend cache for a block device. The purpose is to speedup a |
2 | fairly random but repeated read work load, like the boot of a system. |
3 | |
4 | Signed-off-by: Jens Axboe <axboe@suse.de> |
5 | --- |
6 | block/ll_rw_blk.c | 11 |
7 | drivers/block/Kconfig | 6 |
8 | drivers/block/Makefile | 1 |
9 | drivers/block/fcache.c | 1475 ++++++++++++++++++++++++++++++++++++++++++++++++ |
10 | fs/ext3/super.c | 81 ++ |
11 | include/linux/bio.h | 9 |
12 | include/linux/ext3_fs.h | 14 |
13 | 7 files changed, 1587 insertions(+), 10 deletions(-) |
14 | |
15 | Index: linux-ck-dev/block/ll_rw_blk.c |
16 | =================================================================== |
17 | --- linux-ck-dev.orig/block/ll_rw_blk.c 2006-06-18 15:20:10.000000000 +1000 |
18 | +++ linux-ck-dev/block/ll_rw_blk.c 2006-06-18 15:25:27.000000000 +1000 |
19 | @@ -2817,12 +2817,10 @@ static void init_request_from_bio(struct |
20 | */ |
21 | if (bio_rw_ahead(bio) || bio_failfast(bio)) |
22 | req->flags |= REQ_FAILFAST; |
23 | - |
24 | - /* |
25 | - * REQ_BARRIER implies no merging, but lets make it explicit |
26 | - */ |
27 | if (unlikely(bio_barrier(bio))) |
28 | - req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); |
29 | + req->flags |= REQ_HARDBARRIER; |
30 | + if (!bio_mergeable(bio)) |
31 | + req->flags |= REQ_NOMERGE; |
32 | |
33 | req->errors = 0; |
34 | req->hard_sector = req->sector = bio->bi_sector; |
35 | @@ -2870,7 +2868,7 @@ static int __make_request(request_queue_ |
36 | |
37 | spin_lock_irq(q->queue_lock); |
38 | |
39 | - if (unlikely(barrier) || elv_queue_empty(q)) |
40 | + if (!bio_mergeable(bio) || elv_queue_empty(q)) |
41 | goto get_rq; |
42 | |
43 | el_ret = elv_merge(q, &req, bio); |
44 | @@ -3109,6 +3107,7 @@ void submit_bio(int rw, struct bio *bio) |
45 | |
46 | BIO_BUG_ON(!bio->bi_size); |
47 | BIO_BUG_ON(!bio->bi_io_vec); |
48 | + BIO_BUG_ON(bio->bi_next); |
49 | bio->bi_rw |= rw; |
50 | if (rw & WRITE) |
51 | mod_page_state(pgpgout, count); |
52 | Index: linux-ck-dev/drivers/block/Kconfig |
53 | =================================================================== |
54 | --- linux-ck-dev.orig/drivers/block/Kconfig 2006-06-18 15:20:10.000000000 +1000 |
55 | +++ linux-ck-dev/drivers/block/Kconfig 2006-06-18 15:25:27.000000000 +1000 |
56 | @@ -456,4 +456,10 @@ config ATA_OVER_ETH |
57 | This driver provides Support for ATA over Ethernet block |
58 | devices like the Coraid EtherDrive (R) Storage Blade. |
59 | |
60 | +config BLK_FCACHE |
61 | + bool "Boot frontend cache driver" |
62 | + help |
63 | + This driver puts the data needed for a boot sequentially in a |
64 | + defined place, taking all seeks out of the boot process. |
65 | + |
66 | endmenu |
67 | Index: linux-ck-dev/drivers/block/Makefile |
68 | =================================================================== |
69 | --- linux-ck-dev.orig/drivers/block/Makefile 2006-06-18 15:20:10.000000000 +1000 |
70 | +++ linux-ck-dev/drivers/block/Makefile 2006-06-18 15:25:27.000000000 +1000 |
71 | @@ -5,6 +5,7 @@ |
72 | # Rewritten to use lists instead of if-statements. |
73 | # |
74 | |
75 | +obj-$(CONFIG_BLK_FCACHE) += fcache.o |
76 | obj-$(CONFIG_MAC_FLOPPY) += swim3.o |
77 | obj-$(CONFIG_BLK_DEV_FD) += floppy.o |
78 | obj-$(CONFIG_AMIGA_FLOPPY) += amiflop.o |
79 | Index: linux-ck-dev/drivers/block/fcache.c |
80 | =================================================================== |
81 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 |
82 | +++ linux-ck-dev/drivers/block/fcache.c 2006-06-18 15:25:27.000000000 +1000 |
83 | @@ -0,0 +1,1475 @@ |
84 | +/* |
85 | + * A frontend cache for a block device. The purpose is to speedup a |
86 | + * fairly random but repeated read work load, like the boot of a system. |
87 | + * |
88 | + * When run in priming mode, fcache allocates and writes data read from |
89 | + * the source drive to our extent cache in the order in which they are |
90 | + * accessed. When later run in non-priming mode, data accessed in the same |
91 | + * order will be linearly available in the cache. |
92 | + * |
93 | + * Performance when priming is slower than non-fcache usage would be. If |
94 | + * the fcache is located on another disk, the hit should be small. If the |
95 | + * the fcache is located on the same disk (another partition), it runs |
96 | + * at about half the speed. Non-priming performance should be fairly |
97 | + * similar on same/other disk. |
98 | + * |
99 | + * On-disk format is as follows: |
100 | + * Block0: header |
101 | + * Block1..X extent maps |
102 | + * BlockX+1..Y extent data |
103 | + * |
104 | + * Copyright (C) 2006 Jens Axboe <axboe@suse.de> |
105 | + * |
106 | + */ |
107 | +#include <linux/config.h> |
108 | +#include <linux/module.h> |
109 | +#include <linux/moduleparam.h> |
110 | +#include <linux/sched.h> |
111 | +#include <linux/blkdev.h> |
112 | +#include <linux/prio_tree.h> |
113 | +#include <linux/buffer_head.h> |
114 | +#include <linux/slab.h> |
115 | + |
116 | +#define FCACHE_MAGIC 0x61786663 |
117 | +#define FCACHE_VERSION 0x02 |
118 | + |
119 | +#define FCACHE_HEADER_BLOCK 0 |
120 | +#define FCACHE_EXTENT_BLOCK 1 |
121 | + |
122 | +#undef FCACHE_PAGES_PROTECTED |
123 | + |
124 | +struct fcache_dev { |
125 | + struct block_device *bdev; |
126 | + struct block_device *fs_bdev; |
127 | + make_request_fn *mfn; |
128 | + struct prio_tree_root prio_root; |
129 | + unsigned long next_cache_block; |
130 | + unsigned long nr_extents; |
131 | + unsigned long max_extents; |
132 | + unsigned int old_bs; |
133 | + spinlock_t lock; |
134 | + |
135 | + sector_t cache_start_sector; |
136 | + unsigned long cache_blocks; |
137 | + sector_t fs_start_sector; |
138 | + sector_t fs_sectors; |
139 | + |
140 | + unsigned long flags; |
141 | + int priming; |
142 | + int serial; |
143 | + int chop_ios; |
144 | + |
145 | + struct list_head list; |
146 | + struct work_struct work; |
147 | + |
148 | + /* |
149 | + * stats |
150 | + */ |
151 | + unsigned int ios[2]; |
152 | + unsigned int hits; |
153 | + unsigned int misses; |
154 | + unsigned int overwrites; |
155 | +}; |
156 | + |
157 | +enum { |
158 | + FDEV_F_DOWN = 0, |
159 | +}; |
160 | + |
161 | +static struct fcache_dev fcache_dev; |
162 | + |
163 | +static int disable; |
164 | +module_param(disable, int, 0444); |
165 | + |
166 | +struct fcache_endio_data { |
167 | + struct fcache_dev *fdev; |
168 | + sector_t fs_sector; |
169 | + unsigned int fs_size; |
170 | + sector_t cache_sector; |
171 | + atomic_t completions; |
172 | + struct bio *bio; |
173 | + int io_error; |
174 | + struct list_head list; |
175 | +}; |
176 | + |
177 | +/* |
178 | + * Maps a file system block to the fcache |
179 | + */ |
180 | +struct fcache_extent { |
181 | + sector_t fs_sector; /* real device offset */ |
182 | + unsigned int fs_size; /* extent length */ |
183 | + sector_t cache_sector; /* cache device offset */ |
184 | + |
185 | + struct prio_tree_node prio_node; |
186 | +}; |
187 | + |
188 | +/* |
189 | + * Header on fcache device - will take up the first page of data, so |
190 | + * plenty of room to go around. |
191 | + */ |
192 | +struct fcache_header { |
193 | + u32 magic; /* fcache magic */ |
194 | + u32 version; /* fcache version */ |
195 | + u32 nr_extents; /* nr of extents in cache */ |
196 | + u32 max_extents; /* max nr of extents available */ |
197 | + u32 serial; /* fs and cache serial */ |
198 | + u32 extent_offset; /* where extents start */ |
199 | + u64 fs_start_sector; /* where fs starts */ |
200 | + u64 fs_sectors; /* how big fs is */ |
201 | + char fs_dev[BDEVNAME_SIZE]; /* fs partition */ |
202 | + u64 cache_blocks; /* number of blocks in cache */ |
203 | + u64 cache_blocks_used; /* used blocks in cache */ |
204 | + u16 sector_t_size; /* user space helper */ |
205 | + u16 extent_size; /* user space helper */ |
206 | +}; |
207 | + |
208 | +#define BLOCK_SHIFT (PAGE_SHIFT - 9) |
209 | + |
210 | +static struct kmem_cache *fcache_slab; |
211 | +static struct kmem_cache *fcache_fed_slab; |
212 | +static mempool_t *fed_pool; |
213 | +static struct workqueue_struct *fcache_workqueue; |
214 | + |
215 | +static int fcache_rw_page_endio(struct bio *bio, unsigned int bytes, int err) |
216 | +{ |
217 | + if (bio->bi_size) |
218 | + return 1; |
219 | + |
220 | + complete(bio->bi_private); |
221 | + return 0; |
222 | +} |
223 | + |
224 | +/* |
225 | + * Writes out a page of data and waits for it to complete. |
226 | + */ |
227 | +static int fcache_rw_page(struct fcache_dev *fdev, sector_t index, |
228 | + struct page *page, int rw) |
229 | +{ |
230 | + DECLARE_COMPLETION(wait); |
231 | + struct bio *bio; |
232 | + int ret = 0; |
233 | + |
234 | + bio = bio_alloc(GFP_KERNEL, 1); |
235 | + |
236 | + bio->bi_sector = index << BLOCK_SHIFT; |
237 | + bio->bi_bdev = fdev->bdev; |
238 | + bio->bi_rw |= (1 << BIO_RW_SYNC); |
239 | + bio->bi_end_io = fcache_rw_page_endio; |
240 | + bio->bi_private = &wait; |
241 | + |
242 | + bio_add_page(bio, page, PAGE_SIZE, 0); |
243 | + submit_bio(rw, bio); |
244 | + |
245 | + wait_for_completion(&wait); |
246 | + |
247 | + if (!bio_flagged(bio, BIO_UPTODATE)) |
248 | + ret = -EIO; |
249 | + |
250 | + bio_put(bio); |
251 | + return ret; |
252 | +} |
253 | + |
254 | +static inline void fcache_fill_header(struct fcache_dev *fdev, |
255 | + struct fcache_header *header, |
256 | + unsigned int nr_extents) |
257 | +{ |
258 | + /* |
259 | + * See how many pages we need for extent headers, then we know where |
260 | + * to start putting data. Assume worst case of 1 page per extent, and |
261 | + * reserve the first page for the header. |
262 | + */ |
263 | + |
264 | + header->magic = FCACHE_MAGIC; |
265 | + header->version = FCACHE_VERSION; |
266 | + header->nr_extents = nr_extents; |
267 | + header->max_extents = ((fdev->cache_blocks - 1) * PAGE_SIZE) / (PAGE_SIZE - sizeof(struct fcache_extent)); |
268 | + header->serial = fdev->serial; |
269 | + |
270 | + header->extent_offset = 1 + (header->max_extents * sizeof(struct fcache_extent) / PAGE_SIZE); |
271 | + |
272 | + header->fs_start_sector = fdev->fs_start_sector; |
273 | + header->fs_sectors = fdev->fs_sectors; |
274 | + bdevname(fdev->fs_bdev, header->fs_dev); |
275 | + header->cache_blocks = fdev->cache_blocks; |
276 | + header->cache_blocks_used = fdev->next_cache_block; |
277 | + header->sector_t_size = sizeof(sector_t); |
278 | + header->extent_size = sizeof(struct fcache_extent); |
279 | +} |
280 | + |
281 | +static int fcache_write_new_header(struct fcache_dev *fdev) |
282 | +{ |
283 | + struct fcache_header *header; |
284 | + struct page *page; |
285 | + int ret; |
286 | + |
287 | + page = alloc_page(GFP_HIGHUSER); |
288 | + if (unlikely(!page)) |
289 | + return -ENOMEM; |
290 | + |
291 | + header = kmap_atomic(page, KM_USER0); |
292 | + clear_page(header); |
293 | + fcache_fill_header(fdev, header, 0); |
294 | + fdev->next_cache_block = header->extent_offset; |
295 | + fdev->max_extents = header->max_extents; |
296 | + kunmap_atomic(header, KM_USER0); |
297 | + |
298 | + printk("fcache: new header: first block %lu, max %lu\n", |
299 | + fdev->next_cache_block, fdev->max_extents); |
300 | + ret = fcache_rw_page(fdev, FCACHE_HEADER_BLOCK, page, WRITE); |
301 | + __free_page(page); |
302 | + return ret; |
303 | +} |
304 | + |
305 | +static void fcache_free_prio_tree(struct fcache_dev *fdev) |
306 | +{ |
307 | + struct fcache_extent *fe; |
308 | + struct prio_tree_iter iter; |
309 | + struct prio_tree_node *node; |
310 | + |
311 | + /* |
312 | + * Now prune and free tree, wish there was a better way... |
313 | + */ |
314 | + do { |
315 | + prio_tree_iter_init(&iter, &fdev->prio_root, 0, ULONG_MAX); |
316 | + |
317 | + node = prio_tree_next(&iter); |
318 | + if (!node) |
319 | + break; |
320 | + |
321 | + fe = prio_tree_entry(node, struct fcache_extent, prio_node); |
322 | + prio_tree_remove(&fdev->prio_root, node); |
323 | + kmem_cache_free(fcache_slab, fe); |
324 | + } while (1); |
325 | +} |
326 | + |
327 | +/* |
328 | + * First clear the header, write extents, then write real header. |
329 | + */ |
330 | +static int fcache_write_extents(struct fcache_dev *fdev) |
331 | +{ |
332 | + struct fcache_header *header; |
333 | + sector_t index, sectors; |
334 | + unsigned int nr_extents, this_extents; |
335 | + struct fcache_extent *fe; |
336 | + struct prio_tree_iter iter; |
337 | + struct prio_tree_node *node; |
338 | + struct page *page; |
339 | + void *p; |
340 | + int ret; |
341 | + |
342 | + page = alloc_page(GFP_KERNEL); |
343 | + if (unlikely(!page)) |
344 | + return -ENOMEM; |
345 | + |
346 | + header = page_address(page); |
347 | + clear_page(header); |
348 | + fcache_fill_header(fdev, header, 0); |
349 | + ret = fcache_rw_page(fdev, FCACHE_HEADER_BLOCK, page, WRITE); |
350 | + if (ret) |
351 | + goto err; |
352 | + |
353 | + /* |
354 | + * Now write the extents in page size chunks. |
355 | + */ |
356 | + p = page_address(page); |
357 | + clear_page(p); |
358 | + index = FCACHE_EXTENT_BLOCK; |
359 | + sectors = 0; |
360 | + this_extents = nr_extents = 0; |
361 | + |
362 | + prio_tree_iter_init(&iter, &fdev->prio_root, 0, ULONG_MAX); |
363 | + |
364 | + do { |
365 | + node = prio_tree_next(&iter); |
366 | + if (!node) |
367 | + break; |
368 | + |
369 | + fe = prio_tree_entry(node, struct fcache_extent, prio_node); |
370 | + nr_extents++; |
371 | + this_extents++; |
372 | + sectors += fe->fs_size >> 9; |
373 | + memcpy(p, fe, sizeof(*fe)); |
374 | + p += sizeof(*fe); |
375 | + if ((this_extents + 1) * sizeof(*fe) > PAGE_SIZE) { |
376 | + ret = fcache_rw_page(fdev, index, page, WRITE); |
377 | + if (ret) |
378 | + break; |
379 | + |
380 | + this_extents = 0; |
381 | + index++; |
382 | + p = page_address(page); |
383 | + } |
384 | + } while (1); |
385 | + |
386 | + if (this_extents) |
387 | + ret = fcache_rw_page(fdev, index, page, WRITE); |
388 | + |
389 | + fdev->nr_extents = nr_extents; |
390 | + printk("fcache: wrote %d extents, holding %llu sectors of data\n", |
391 | + nr_extents, (unsigned long long) sectors); |
392 | +err: |
393 | + __free_page(page); |
394 | + return ret; |
395 | +} |
396 | + |
397 | +static int fcache_write_header(struct fcache_dev *fdev) |
398 | +{ |
399 | + struct page *page; |
400 | + int ret; |
401 | + |
402 | + page = alloc_page(GFP_KERNEL); |
403 | + if (unlikely(!page)) |
404 | + return -ENOMEM; |
405 | + |
406 | + ret = fcache_rw_page(fdev, FCACHE_HEADER_BLOCK, page, READ); |
407 | + if (!ret) { |
408 | + struct fcache_header *header = page_address(page); |
409 | + |
410 | + fcache_fill_header(fdev, header, fdev->nr_extents); |
411 | + ret = fcache_rw_page(fdev, FCACHE_HEADER_BLOCK, page, WRITE); |
412 | + printk("fcache: wrote header (extents=%lu,serial=%d)\n", |
413 | + fdev->nr_extents, fdev->serial); |
414 | + } |
415 | + |
416 | + __free_page(page); |
417 | + return ret; |
418 | +} |
419 | + |
420 | +static void fcache_tree_link(struct fcache_dev *fdev, struct fcache_extent *fe) |
421 | +{ |
422 | + struct prio_tree_node *node = &fe->prio_node; |
423 | + unsigned long flags; |
424 | + |
425 | + INIT_PRIO_TREE_NODE(node); |
426 | + node->start = fe->fs_sector; |
427 | + node->last = fe->fs_sector + (fe->fs_size >> 9) - 1; |
428 | + |
429 | + spin_lock_irqsave(&fdev->lock, flags); |
430 | + prio_tree_insert(&fdev->prio_root, node); |
431 | + spin_unlock_irqrestore(&fdev->lock, flags); |
432 | +} |
433 | + |
434 | +#define MAX_FE 16 |
435 | + |
436 | +/* |
437 | + * Lookup the range of a given request in the prio tree. Used for both |
438 | + * looking up a range covering a read operation to be served from cache, |
439 | + * and to lookup potential conflicts from a new write with an existing |
440 | + * extent. |
441 | + */ |
442 | +static int fcache_lookup_extent(struct fcache_dev *fdev, sector_t offset, |
443 | + unsigned int bytes, struct fcache_extent **map) |
444 | +{ |
445 | + sector_t end_sector = offset + (bytes >> 9) - 1; |
446 | + struct prio_tree_node *node; |
447 | + struct prio_tree_iter iter; |
448 | + int i = 0; |
449 | + |
450 | + prio_tree_iter_init(&iter, &fdev->prio_root, offset, end_sector); |
451 | + |
452 | + /* |
453 | + * We only need to lock, if we are priming. The prio tree does |
454 | + * not change when in normal mode. |
455 | + */ |
456 | + if (fdev->priming) |
457 | + spin_lock_irq(&fdev->lock); |
458 | + |
459 | + do { |
460 | + node = prio_tree_next(&iter); |
461 | + if (!node) |
462 | + break; |
463 | + |
464 | + map[i] = prio_tree_entry(node, struct fcache_extent, prio_node); |
465 | + } while (++i < MAX_FE); |
466 | + |
467 | + if (fdev->priming) |
468 | + spin_unlock_irq(&fdev->lock); |
469 | + |
470 | + return i; |
471 | +} |
472 | + |
473 | +/* |
474 | + * Our data write is done, now insert the fcache extents into the rbtree. |
475 | + */ |
476 | +static int fcache_instantiate_extent(struct fcache_dev *fdev, |
477 | + struct fcache_endio_data *fed) |
478 | +{ |
479 | + struct fcache_extent *fe; |
480 | + |
481 | + fe = kmem_cache_alloc(fcache_slab, GFP_ATOMIC); |
482 | + if (fe) { |
483 | + fe->fs_sector = fed->fs_sector; |
484 | + fe->fs_size = fed->fs_size; |
485 | + fe->cache_sector = fed->cache_sector; |
486 | + |
487 | + fcache_tree_link(fdev, fe); |
488 | + return 0; |
489 | + } |
490 | + |
491 | + return -ENOMEM; |
492 | +} |
493 | + |
494 | +/* |
495 | + * Hang on to the bio and its pages - ideally we would want to ensure |
496 | + * that the page data doesn't change between calling this function and |
497 | + * fcache_put_bio_pages() as well... |
498 | + */ |
499 | +static void fcache_get_bio_pages(struct fcache_dev *fdev, struct bio *bio) |
500 | +{ |
501 | + /* |
502 | + * Currently stubbed out, as we cannot end the bio read before |
503 | + * the write completes without also making sure that the pages |
504 | + * don't get reused for something else in the mean time. |
505 | + */ |
506 | +#ifdef FCACHE_PAGES_PROTECTED |
507 | + struct bio_vec *bvec; |
508 | + int i; |
509 | + |
510 | + bio_get(bio); |
511 | + |
512 | + __bio_for_each_segment(bvec, bio, i, 0) |
513 | + get_page(bvec->bv_page); |
514 | +#endif |
515 | +} |
516 | + |
517 | +static void fcache_put_bio_pages(struct fcache_dev *fdev, struct bio *bio) |
518 | +{ |
519 | +#ifdef FCACHE_PAGES_PROTECTED |
520 | + struct bio_vec *bvec; |
521 | + int i; |
522 | + |
523 | + __bio_for_each_segment(bvec, bio, i, 0) |
524 | + put_page(bvec->bv_page); |
525 | + |
526 | + bio_put(bio); |
527 | +#endif |
528 | +} |
529 | + |
530 | +static void fcache_chop_write_done(struct fcache_endio_data *fed) |
531 | +{ |
532 | + /* |
533 | + * Last io completes. |
534 | + */ |
535 | + if (atomic_dec_and_test(&fed->completions)) { |
536 | + struct fcache_dev *fdev = fed->fdev; |
537 | + struct bio *bio = fed->bio; |
538 | + |
539 | + /* |
540 | + * Release our reference to the original bio and |
541 | + * its pages. |
542 | + */ |
543 | + fcache_put_bio_pages(fdev, bio); |
544 | + |
545 | + /* |
546 | + * End the read! |
547 | + */ |
548 | + bio_endio(bio, bio->bi_size, 0); |
549 | + |
550 | + /* |
551 | + * All done, now add extent to our list if io completed ok. |
552 | + */ |
553 | + if (!fed->io_error) |
554 | + fcache_instantiate_extent(fdev, fed); |
555 | + |
556 | + mempool_free(fed, fed_pool); |
557 | + } |
558 | +} |
559 | + |
560 | +/* |
561 | + * Our data write to the cache completes, we can free our clone and |
562 | + * instantiate the extent block. |
563 | + */ |
564 | +static int fcache_extent_write_endio(struct bio *bio, unsigned int bytes, |
565 | + int err) |
566 | +{ |
567 | + struct fcache_endio_data *fed; |
568 | + |
569 | + if (bio->bi_size) |
570 | + return 1; |
571 | + |
572 | + fed = bio->bi_private; |
573 | + |
574 | + if (!bio_flagged(bio, BIO_UPTODATE)) |
575 | + fed->io_error = -EIO; |
576 | + |
577 | + bio_put(bio); |
578 | + fcache_chop_write_done(fed); |
579 | + return 0; |
580 | +} |
581 | + |
582 | +static void fcache_chop_read_done(struct fcache_endio_data *fed) |
583 | +{ |
584 | + if (atomic_dec_and_test(&fed->completions)) { |
585 | + struct bio *bio = fed->bio; |
586 | + |
587 | + bio_endio(bio, bio->bi_size, fed->io_error); |
588 | + mempool_free(fed, fed_pool); |
589 | + } |
590 | +} |
591 | + |
592 | +static int fcache_chop_read_endio(struct bio *bio, unsigned int bytes, int err) |
593 | +{ |
594 | + struct fcache_endio_data *fed; |
595 | + |
596 | + if (bio->bi_size) |
597 | + return 1; |
598 | + |
599 | + fed = bio->bi_private; |
600 | + |
601 | + if (!bio_flagged(bio, BIO_UPTODATE)) |
602 | + fed->io_error = -EIO; |
603 | + |
604 | + bio_put(bio); |
605 | + fcache_chop_read_done(fed); |
606 | + return 0; |
607 | +} |
608 | + |
609 | +typedef void (chopper_done_t) (struct fcache_endio_data *); |
610 | + |
611 | +/* |
612 | + * This is our io chopper - it hacks a bio into smaller pieces, suitable |
613 | + * for the target device. Caller supplies suitable end_io and done functions. |
614 | + */ |
615 | +static void fcache_io_chopper(struct fcache_dev *fdev, |
616 | + struct fcache_endio_data *fed, |
617 | + bio_end_io_t *endio, chopper_done_t *done, int rw) |
618 | +{ |
619 | + struct bio *bio = NULL; |
620 | + struct bio_vec *bv; |
621 | + unsigned int total_bytes; |
622 | + sector_t sector; |
623 | + int i, vecs; |
624 | + |
625 | + /* |
626 | + * Make sure 'fed' doesn't disappear while we are still issuing |
627 | + * ios, the artificial reference is dropped at the end. |
628 | + */ |
629 | + atomic_set(&fed->completions, 1); |
630 | + |
631 | + sector = fed->cache_sector; |
632 | + total_bytes = fed->fs_size; |
633 | + vecs = fed->bio->bi_vcnt; |
634 | + bio_for_each_segment(bv, fed->bio, i) { |
635 | + unsigned int len; |
636 | + |
637 | + if (!total_bytes) |
638 | + break; |
639 | + |
640 | + len = bv->bv_len; |
641 | + if (len > total_bytes) |
642 | + len = total_bytes; |
643 | + |
644 | + do { |
645 | + unsigned int l; |
646 | + |
647 | + if (!bio) { |
648 | + bio = bio_alloc(GFP_NOFS, vecs); |
649 | + |
650 | + bio->bi_sector = sector; |
651 | + bio->bi_bdev = fdev->bdev; |
652 | + bio->bi_end_io = endio; |
653 | + bio->bi_private = fed; |
654 | + } |
655 | + |
656 | + /* |
657 | + * If successful, break out of this loop and move on. |
658 | + */ |
659 | + l = bio_add_page(bio, bv->bv_page, len, bv->bv_offset); |
660 | + if (l == len) |
661 | + break; |
662 | + |
663 | + BUG_ON(!bio->bi_size); |
664 | + |
665 | + /* |
666 | + * We could not add this page, submit what we have |
667 | + * and alloc a new bio. |
668 | + */ |
669 | + atomic_inc(&fed->completions); |
670 | + submit_bio(rw, bio); |
671 | + bio = NULL; |
672 | + } while (1); |
673 | + |
674 | + total_bytes -= len; |
675 | + sector += len >> 9; |
676 | + vecs--; |
677 | + } |
678 | + |
679 | + if (bio) { |
680 | + atomic_inc(&fed->completions); |
681 | + submit_bio(rw, bio); |
682 | + } |
683 | + |
684 | + /* |
685 | + * Drop our reference to fed. |
686 | + */ |
687 | + done(fed); |
688 | +} |
689 | + |
690 | +/* |
691 | + * cache device has similar or higher queue restrictions than the fs |
692 | + * device - in that case, we can resubmit the bio to the device directly. |
693 | + */ |
694 | +static void fcache_direct_cache_write(struct fcache_dev *fdev, |
695 | + struct fcache_endio_data *fed) |
696 | +{ |
697 | + struct bio *bio = bio_clone(fed->bio, GFP_NOFS); |
698 | + |
699 | + bio->bi_sector = fed->cache_sector; |
700 | + bio->bi_bdev = fdev->bdev; |
701 | + bio->bi_end_io = fcache_extent_write_endio; |
702 | + bio->bi_private = fed; |
703 | + |
704 | + atomic_set(&fed->completions, 1); |
705 | + submit_bio(WRITE, bio); |
706 | +} |
707 | + |
708 | +/* |
709 | + * cache device has more conservative restrictions than the fs device. |
710 | + * The safest approach is to split up the bio and let bio_add_page() |
711 | + * decide when it's time to submit the pieces. |
712 | + */ |
713 | +static void fcache_submit_cache_write(struct fcache_dev *fdev, |
714 | + struct fcache_endio_data *fed) |
715 | +{ |
716 | + if (!fdev->chop_ios) |
717 | + fcache_direct_cache_write(fdev, fed); |
718 | + else |
719 | + fcache_io_chopper(fdev, fed, fcache_extent_write_endio, |
720 | + fcache_chop_write_done, WRITE); |
721 | +} |
722 | + |
723 | +/* |
724 | + * We punt work to fcache_work() whenever we need do work that blocks. The |
725 | + * only thing that this thread handles is submitting the extent write |
726 | + * when the real read has completed. We used to do the extent instantiation |
727 | + * here as well, but fcache_extent_write_endio handles that now. |
728 | + */ |
729 | +static void fcache_work(void *data) |
730 | +{ |
731 | + struct fcache_dev *fdev = data; |
732 | + |
733 | + do { |
734 | + struct fcache_endio_data *fed = NULL; |
735 | + struct bio *bio; |
736 | + |
737 | + spin_lock_irq(&fdev->lock); |
738 | + if (!list_empty(&fdev->list)) { |
739 | + fed = list_entry(fdev->list.next, struct fcache_endio_data,list); |
740 | + list_del_init(&fed->list); |
741 | + } |
742 | + spin_unlock_irq(&fdev->lock); |
743 | + |
744 | + if (!fed) |
745 | + break; |
746 | + |
747 | + bio = fed->bio; |
748 | + |
749 | + if (fed->io_error) { |
750 | + printk(KERN_ERR "fcache: read error from device\n"); |
751 | + bio_endio(bio, bio->bi_size, fed->io_error); |
752 | + continue; |
753 | + } |
754 | + |
755 | + /* |
756 | + * Get a ref on the original bio and pages, then |
757 | + * we should be able to signal completion of the READ |
758 | + * without waiting for the write to finish first. |
759 | + */ |
760 | + fcache_get_bio_pages(fdev, bio); |
761 | + |
762 | + /* |
763 | + * Submit the read data as cache writes. |
764 | + */ |
765 | + fcache_submit_cache_write(fdev, fed); |
766 | + |
767 | + /* |
768 | + * If fcache_get_bio_pages() could protect the pages from |
769 | + * being changed, we could end the io here instead of in |
770 | + * fcache_extent_fed_completes(). |
771 | + */ |
772 | + } while (1); |
773 | +} |
774 | + |
775 | +/* |
776 | + * Align bio to start at extent and stop sooner if extent is short. Must |
777 | + * be called cautiously - it's only allowed to modify the bio if this is |
778 | + * a clone and a write request, reads must be fully aligned and only |
779 | + * possibly require a starting offset modification. |
780 | + */ |
781 | +static void fcache_bio_align(struct bio *bio, struct fcache_extent *fe) |
782 | +{ |
783 | + struct bio_vec *bvec; |
784 | + sector_t start, end; |
785 | + sector_t org_start, org_end; |
786 | + unsigned int org_size, org_idx; |
787 | + int i; |
788 | + |
789 | + start = bio->bi_sector; |
790 | + bio->bi_sector = fe->cache_sector; |
791 | + |
792 | + /* |
793 | + * Nothing to do, perfectly aligned. |
794 | + */ |
795 | + if (start == fe->fs_sector && bio->bi_size == fe->fs_size) |
796 | + return; |
797 | + |
798 | + org_start = bio->bi_sector; |
799 | + org_end = bio->bi_sector + (bio->bi_size >> 9); |
800 | + org_size = bio->bi_size; |
801 | + org_idx = bio->bi_idx; |
802 | + |
803 | + /* |
804 | + * Adjust beginning. |
805 | + */ |
806 | + if (start > fe->fs_sector) |
807 | + bio->bi_sector += (start - fe->fs_sector); |
808 | + else if (start < fe->fs_sector) { |
809 | + sector_t diff = fe->fs_sector - start; |
810 | + int idx = 0; |
811 | + |
812 | + BUG_ON(!(bio->bi_flags & (1 << BIO_CLONED))); |
813 | + BUG_ON(bio_data_dir(bio) != WRITE); |
814 | + |
815 | + /* |
816 | + * Adjust where bio starts |
817 | + */ |
818 | + __bio_for_each_segment(bvec, bio, i, 0) { |
819 | + unsigned int bsec = bvec->bv_len >> 9; |
820 | + unsigned int this_diff = bsec; |
821 | + |
822 | + if (!diff) |
823 | + break; |
824 | + if (this_diff > diff) |
825 | + this_diff = diff; |
826 | + |
827 | + bio->bi_sector += this_diff; |
828 | + bio->bi_size -= (this_diff << 9); |
829 | + |
830 | + /* |
831 | + * Bigger than this chunk, skip ahead. |
832 | + */ |
833 | + if (this_diff == bsec) { |
834 | + idx++; |
835 | + diff -= this_diff; |
836 | + continue; |
837 | + } |
838 | + |
839 | + /* |
840 | + * Adjust this bvec |
841 | + */ |
842 | + bvec->bv_offset += (this_diff << 9); |
843 | + bvec->bv_len -= (this_diff << 9); |
844 | + break; |
845 | + } |
846 | + bio->bi_idx += idx; |
847 | + } |
848 | + |
849 | + /* |
850 | + * Goes beyond the end, shrink size. |
851 | + */ |
852 | + end = bio->bi_sector + (bio->bi_size >> 9); |
853 | + if (end > fe->cache_sector + (fe->fs_size >> 9)) { |
854 | + sector_t diff = end - (fe->cache_sector + (fe->fs_size >> 9)); |
855 | + int vecs = 0; |
856 | + |
857 | + BUG_ON(!(bio->bi_flags & (1 << BIO_CLONED))); |
858 | + BUG_ON(bio_data_dir(bio) != WRITE); |
859 | + |
860 | + /* |
861 | + * This is __bio_for_each_segment_reverse(). |
862 | + */ |
863 | + for (i = bio->bi_vcnt - 1; i >= bio->bi_idx; i--) { |
864 | + struct bio_vec *bvec = &bio->bi_io_vec[i]; |
865 | + unsigned int bsec = bvec->bv_len >> 9; |
866 | + unsigned int this_diff = bsec; |
867 | + |
868 | + if (!diff) |
869 | + break; |
870 | + if (this_diff > diff) |
871 | + this_diff = diff; |
872 | + |
873 | + bio->bi_size -= (this_diff << 9); |
874 | + |
875 | + /* |
876 | + * Bigger than this chunk, skip ahead. |
877 | + */ |
878 | + if (this_diff == bsec) { |
879 | + vecs++; |
880 | + diff -= this_diff; |
881 | + continue; |
882 | + } |
883 | + |
884 | + /* |
885 | + * Adjust this bvec |
886 | + */ |
887 | + bvec->bv_len -= (this_diff << 9); |
888 | + break; |
889 | + } |
890 | + bio->bi_vcnt -= vecs; |
891 | + } |
892 | + |
893 | + BUG_ON(bio->bi_sector < fe->cache_sector); |
894 | + BUG_ON(bio->bi_sector + (bio->bi_size >> 9) > fe->cache_sector + (fe->fs_size >> 9)); |
895 | + |
896 | + /* |
897 | + * Invalidate the segment counts, we changed the bio layout. |
898 | + */ |
899 | + bio->bi_flags &= ~(1 << BIO_SEG_VALID); |
900 | + bio->bi_flags |= (1 << BIO_NOMERGE); |
901 | +} |
902 | + |
903 | +static int fcache_overwrite_endio(struct bio *bio, unsigned int bytes, int err) |
904 | +{ |
905 | + if (bio->bi_size) |
906 | + return 1; |
907 | + |
908 | + if (!bio_flagged(bio, BIO_UPTODATE)) { |
909 | + struct fcache_dev *fdev = bio->bi_private; |
910 | + |
911 | + printk(KERN_ERR "fcache: overwrite error, cache off\n"); |
912 | + set_bit(FDEV_F_DOWN, &fdev->flags); |
913 | + } |
914 | + |
915 | + bio_put(bio); |
916 | + return 0; |
917 | +} |
918 | + |
919 | +/* |
920 | + * Schedule overwrite of some existing block(s). |
921 | + */ |
922 | +static int fcache_overwrite_extent(struct fcache_dev *fdev, |
923 | + struct fcache_extent *fe, struct bio *bio) |
924 | +{ |
925 | + struct bio *clone = bio_clone(bio, GFP_NOFS); |
926 | + |
927 | + clone->bi_bdev = fdev->bdev; |
928 | + clone->bi_end_io = fcache_overwrite_endio; |
929 | + clone->bi_private = fdev; |
930 | + fcache_bio_align(clone, fe); |
931 | + submit_bio(WRITE, clone); |
932 | + return 0; |
933 | +} |
934 | + |
935 | +/* |
936 | + * Our real data read is complete. Kick our process context handler so it |
937 | + * can submit the write to our cache. |
938 | + */ |
939 | +static int fcache_extent_endio(struct bio *bio, unsigned int bytes, int err) |
940 | +{ |
941 | + struct fcache_dev *fdev; |
942 | + struct fcache_endio_data *fed; |
943 | + unsigned long flags; |
944 | + |
945 | + if (bio->bi_size) |
946 | + return 1; |
947 | + |
948 | + fed = bio->bi_private; |
949 | + |
950 | + if (!bio_flagged(bio, BIO_UPTODATE)) |
951 | + fed->io_error = -EIO; |
952 | + |
953 | + bio_put(bio); |
954 | + |
955 | + fdev = fed->fdev; |
956 | + spin_lock_irqsave(&fdev->lock, flags); |
957 | + list_add_tail(&fed->list, &fdev->list); |
958 | + spin_unlock_irqrestore(&fdev->lock, flags); |
959 | + queue_work(fcache_workqueue, &fdev->work); |
960 | + return 0; |
961 | +} |
962 | + |
963 | +/* |
964 | + * This initiates adding an extent to our list. We do this by cloning the |
965 | + * original bio and submitting that to the real device and when that completes |
966 | + * we write that out to the cache device and instantiate the extent. |
967 | + */ |
968 | +static int fcache_add_extent(struct fcache_dev *fdev, struct bio *bio) |
969 | +{ |
970 | + struct fcache_endio_data *fed; |
971 | + struct bio *clone; |
972 | + |
973 | + fed = mempool_alloc(fed_pool, GFP_NOIO); |
974 | + |
975 | + fed->fdev = fdev; |
976 | + fed->fs_sector = bio->bi_sector; |
977 | + fed->fs_size = bio->bi_size; |
978 | + fed->cache_sector = -1; |
979 | + fed->bio = bio; |
980 | + fed->io_error = 0; |
981 | + INIT_LIST_HEAD(&fed->list); |
982 | + |
983 | + /* |
984 | + * Allocate/assign an extent block for this range |
985 | + */ |
986 | + spin_lock_irq(&fdev->lock); |
987 | + if (fdev->nr_extents < fdev->max_extents) { |
988 | + unsigned long nr = (bio->bi_size + PAGE_SIZE - 1) >> PAGE_SHIFT; |
989 | + |
990 | + if (fdev->next_cache_block + nr <= fdev->cache_blocks) { |
991 | + fdev->nr_extents++; |
992 | + fed->cache_sector = fdev->next_cache_block << BLOCK_SHIFT; |
993 | + fdev->next_cache_block += nr; |
994 | + } |
995 | + } |
996 | + spin_unlock_irq(&fdev->lock); |
997 | + |
998 | + /* |
999 | + * Ran out of room |
1000 | + */ |
1001 | + if (fed->cache_sector == -1) { |
1002 | + printk(KERN_ERR "fcache: ran out of space, priming now off\n"); |
1003 | + fdev->priming = 0; |
1004 | + mempool_free(fed, fed_pool); |
1005 | + return -ENOENT; |
1006 | + } |
1007 | + |
1008 | + clone = bio_clone(bio, GFP_NOFS); |
1009 | + clone->bi_private = fed; |
1010 | + clone->bi_end_io = fcache_extent_endio; |
1011 | + clone->bi_rw |= (1 << BIO_RW_SYNC); |
1012 | + |
1013 | + generic_make_request(clone); |
1014 | + return 0; |
1015 | +} |
1016 | + |
1017 | +static int fcache_parse_extents(struct fcache_dev *fdev, void *addr, |
1018 | + unsigned int max_extents) |
1019 | +{ |
1020 | + int nr_extents = PAGE_SIZE / sizeof(struct fcache_extent); |
1021 | + int extents_read; |
1022 | + |
1023 | + if (nr_extents > max_extents) |
1024 | + nr_extents = max_extents; |
1025 | + |
1026 | + extents_read = 0; |
1027 | + while (nr_extents) { |
1028 | + struct fcache_extent *fe, *__fe = addr; |
1029 | + |
1030 | + fe = kmem_cache_alloc(fcache_slab, GFP_KERNEL); |
1031 | + if (unlikely(!fe)) |
1032 | + return -ENOMEM; |
1033 | + |
1034 | + memset(fe, 0, sizeof(*fe)); |
1035 | + fe->fs_sector = __fe->fs_sector; |
1036 | + fe->fs_size = __fe->fs_size; |
1037 | + fe->cache_sector = __fe->cache_sector; |
1038 | + |
1039 | + fcache_tree_link(fdev, fe); |
1040 | + |
1041 | + nr_extents--; |
1042 | + extents_read++; |
1043 | + addr += sizeof(*fe); |
1044 | + } |
1045 | + |
1046 | + return extents_read; |
1047 | +} |
1048 | + |
1049 | +static int fcache_read_extents(struct fcache_dev *fdev) |
1050 | +{ |
1051 | + unsigned int nr_extents = fdev->nr_extents; |
1052 | + int ret, extents, total_extents; |
1053 | + struct page *page; |
1054 | + sector_t index; |
1055 | + void *p; |
1056 | + |
1057 | + page = alloc_page(GFP_KERNEL); |
1058 | + if (unlikely(!page)) |
1059 | + return -ENOMEM; |
1060 | + |
1061 | + ret = 0; |
1062 | + total_extents = 0; |
1063 | + index = FCACHE_EXTENT_BLOCK; |
1064 | + while (nr_extents) { |
1065 | + ret = fcache_rw_page(fdev, index, page, READ); |
1066 | + if (ret) |
1067 | + break; |
1068 | + |
1069 | + p = page_address(page); |
1070 | + extents = fcache_parse_extents(fdev, p, nr_extents); |
1071 | + |
1072 | + if (extents < 0) { |
1073 | + ret = extents; |
1074 | + break; |
1075 | + } |
1076 | + |
1077 | + index++; |
1078 | + nr_extents -= extents; |
1079 | + total_extents += extents; |
1080 | + } |
1081 | + |
1082 | + __free_page(page); |
1083 | + |
1084 | + if (ret) |
1085 | + return ret; |
1086 | + |
1087 | + return total_extents; |
1088 | +} |
1089 | + |
1090 | +/* |
1091 | + * Read an existing fcache header from the device, and then proceed to |
1092 | + * reading and adding the extents to out prio tree. |
1093 | + */ |
1094 | +static int fcache_load_header(struct fcache_dev *fdev, int serial) |
1095 | +{ |
1096 | + struct fcache_header *header = NULL; |
1097 | + struct page *page; |
1098 | + int ret, wrong_serial = 0; |
1099 | + char b[BDEVNAME_SIZE]; |
1100 | + |
1101 | + page = alloc_page(GFP_HIGHUSER); |
1102 | + if (unlikely(!page)) |
1103 | + return -ENOMEM; |
1104 | + |
1105 | + ret = fcache_rw_page(fdev, FCACHE_HEADER_BLOCK, page, READ); |
1106 | + if (unlikely(ret)) |
1107 | + goto err; |
1108 | + |
1109 | + ret = -EINVAL; |
1110 | + header = kmap_atomic(page, KM_USER0); |
1111 | + if (header->magic != FCACHE_MAGIC) { |
1112 | + printk(KERN_ERR "fcache: bad magic %x\n", header->magic); |
1113 | + goto err; |
1114 | + } |
1115 | + if (header->version != FCACHE_VERSION) { |
1116 | + printk(KERN_ERR "fcache: bad version %d\n", header->version); |
1117 | + goto err; |
1118 | + } |
1119 | + if (strcmp(bdevname(fdev->fs_bdev, b), header->fs_dev)) { |
1120 | + printk(KERN_ERR "fcache: device mismatch (%s/%s\n", b, |
1121 | + header->fs_dev); |
1122 | + goto err; |
1123 | + } |
1124 | + if (header->fs_start_sector != fdev->fs_start_sector || |
1125 | + header->fs_sectors != fdev->fs_sectors) { |
1126 | + printk(KERN_ERR "fcache: fs appears to have changed size\n"); |
1127 | + goto err; |
1128 | + } |
1129 | + |
1130 | + fdev->nr_extents = header->nr_extents; |
1131 | + fdev->max_extents = header->max_extents; |
1132 | + |
1133 | + /* |
1134 | + * Don't fail on out-of-date serial, just warn that the user needs |
1135 | + * to prime the cache again. Until then we'll just bypass the cache. |
1136 | + */ |
1137 | + if (header->serial != serial) { |
1138 | + printk(KERN_ERR "fcache: found serial %d, expected %d.\n", |
1139 | + header->serial, serial); |
1140 | + printk(KERN_ERR "fcache: reprime the cache!\n"); |
1141 | + wrong_serial = 1; |
1142 | + } |
1143 | + |
1144 | + fdev->serial = header->serial; |
1145 | + kunmap_atomic(header, KM_USER0); |
1146 | + __free_page(page); |
1147 | + |
1148 | + if (!wrong_serial) { |
1149 | + printk("fcache: header looks valid (extents=%ld extents, serial=%u)\n", fdev->nr_extents, fdev->serial); |
1150 | + |
1151 | + ret = fcache_read_extents(fdev); |
1152 | + printk("fcache: loaded %d extents\n", ret); |
1153 | + |
1154 | + /* |
1155 | + * If we don't find all the extents we require, fail. |
1156 | + */ |
1157 | + if (ret != fdev->nr_extents) { |
1158 | + fcache_free_prio_tree(fdev); |
1159 | + ret = -EINVAL; |
1160 | + } else |
1161 | + ret = 0; |
1162 | + } |
1163 | + |
1164 | + return ret; |
1165 | +err: |
1166 | + __free_page(page); |
1167 | + if (header) |
1168 | + kunmap_atomic(header, KM_USER0); |
1169 | + return ret; |
1170 | +} |
1171 | + |
1172 | +/* |
1173 | + * We use this range to decide when to log an io to the target device. |
1174 | + */ |
1175 | +static void fcache_fill_fs_size(struct fcache_dev *fdev) |
1176 | +{ |
1177 | + struct block_device *bdev = fdev->fs_bdev; |
1178 | + |
1179 | + /* |
1180 | + * Partition or whole device? |
1181 | + */ |
1182 | + if (bdev != bdev->bd_contains) { |
1183 | + struct hd_struct *p = bdev->bd_part; |
1184 | + |
1185 | + fdev->fs_start_sector = p->start_sect; |
1186 | + fdev->fs_sectors = p->nr_sects; |
1187 | + } else { |
1188 | + fdev->fs_start_sector = 0; |
1189 | + fdev->fs_sectors = bdev->bd_inode->i_size >> 9; |
1190 | + } |
1191 | +} |
1192 | + |
1193 | +static void fcache_fill_cache_size(struct fcache_dev *fdev) |
1194 | +{ |
1195 | + struct block_device *bdev = fdev->bdev; |
1196 | + |
1197 | + /* |
1198 | + * Partition or whole device? |
1199 | + */ |
1200 | + if (bdev != bdev->bd_contains) { |
1201 | + struct hd_struct *p = bdev->bd_part; |
1202 | + |
1203 | + fdev->cache_start_sector = p->start_sect; |
1204 | + fdev->cache_blocks = p->nr_sects >> BLOCK_SHIFT; |
1205 | + } else { |
1206 | + fdev->cache_start_sector = 0; |
1207 | + fdev->cache_blocks = bdev->bd_inode->i_size >> PAGE_SHIFT; |
1208 | + } |
1209 | +} |
1210 | + |
1211 | +/* |
1212 | + * This is a read request, check if we have that block. If we do, then |
1213 | + * just redirect. If not, pass it through. |
1214 | + */ |
1215 | +static int fcache_read_request(struct fcache_dev *fdev, request_queue_t *q, |
1216 | + struct bio *bio) |
1217 | +{ |
1218 | + struct fcache_extent *extents[MAX_FE]; |
1219 | + struct fcache_extent *fe; |
1220 | + int i, nr; |
1221 | + |
1222 | + /* |
1223 | + * Not there, redirect to original but schedule adding this extent |
1224 | + * to our list if we are priming. |
1225 | + */ |
1226 | + nr = fcache_lookup_extent(fdev, bio->bi_sector, bio->bi_size, extents); |
1227 | + if (!nr) { |
1228 | + if (fdev->priming && !fcache_add_extent(fdev, bio)) |
1229 | + return 0; |
1230 | + |
1231 | + fdev->misses++; |
1232 | + return fdev->mfn(q, bio); |
1233 | + } |
1234 | + |
1235 | + /* |
1236 | + * If range is at least as big, we use our cache. If not, cop out |
1237 | + * and just submit to real device. |
1238 | + */ |
1239 | + for (i = 0; i < nr; i++) { |
1240 | + sector_t end_fe, end_bi; |
1241 | + fe = extents[i]; |
1242 | + |
1243 | + end_fe = fe->fs_sector + (fe->fs_size >> 9); |
1244 | + end_bi = bio->bi_sector + (bio->bi_size >> 9); |
1245 | + |
1246 | + /* |
1247 | + * match! |
1248 | + */ |
1249 | + if (bio->bi_sector >= fe->fs_sector && end_bi <= end_fe) |
1250 | + break; |
1251 | + |
1252 | + fe = NULL; |
1253 | + } |
1254 | + |
1255 | + /* |
1256 | + * Nopes, send to real device. |
1257 | + */ |
1258 | + if (!fe) { |
1259 | + fdev->misses++; |
1260 | + return fdev->mfn(q, bio); |
1261 | + } |
1262 | + |
1263 | + /* |
1264 | + * Perfect, adjust start offset if it isn't aligned. |
1265 | + */ |
1266 | + fdev->hits++; |
1267 | + fcache_bio_align(bio, fe); |
1268 | + |
1269 | + /* |
1270 | + * If we don't have to chop it up, just let generic_make_request() |
1271 | + * handle the stacking. Otherwise, return handled and pass to chopper. |
1272 | + */ |
1273 | + if (fdev->chop_ios) { |
1274 | + struct fcache_endio_data *fed; |
1275 | + |
1276 | + fed = mempool_alloc(fed_pool, GFP_NOIO); |
1277 | + |
1278 | + fed->fdev = fdev; |
1279 | + fed->cache_sector = bio->bi_sector; |
1280 | + fed->fs_size = bio->bi_size; |
1281 | + fed->bio = bio; |
1282 | + fed->io_error = 0; |
1283 | + fcache_io_chopper(fdev, fed, fcache_chop_read_endio, |
1284 | + fcache_chop_read_done, READ); |
1285 | + return 0; |
1286 | + } |
1287 | + |
1288 | + bio->bi_bdev = fdev->bdev; |
1289 | + return 1; |
1290 | +} |
1291 | + |
1292 | +/* |
1293 | + * If we are priming the cache, always add this block. If not, then we still |
1294 | + * need to overwrite this block if it's in our cache. |
1295 | + */ |
1296 | +static int fcache_write_request(struct fcache_dev *fdev, request_queue_t *q, |
1297 | + struct bio *bio) |
1298 | +{ |
1299 | + struct fcache_extent *extents[MAX_FE]; |
1300 | + struct fcache_extent *fe; |
1301 | + sector_t start = bio->bi_sector; |
1302 | + int i, nr; |
1303 | + |
1304 | +repeat: |
1305 | + nr = fcache_lookup_extent(fdev, bio->bi_sector, bio->bi_size, extents); |
1306 | + |
1307 | + /* |
1308 | + * Find out what to overwrite, if anything. |
1309 | + */ |
1310 | + for (i = 0; i < nr; i++) { |
1311 | + fe = extents[i]; |
1312 | + fdev->overwrites++; |
1313 | + fcache_overwrite_extent(fdev, fe, bio); |
1314 | + } |
1315 | + |
1316 | + /* |
1317 | + * If i == MAX_FE, there _may_ be more extents. Repeat lookup, start |
1318 | + * from the end of last request. |
1319 | + */ |
1320 | + if (i == MAX_FE) { |
1321 | + fe = extents[i - 1]; |
1322 | + start = fe->fs_sector + (fe->fs_size >> 9); |
1323 | + goto repeat; |
1324 | + } |
1325 | + |
1326 | + return fdev->mfn(q, bio); |
1327 | +} |
1328 | + |
1329 | +/* |
1330 | + * This is the only case where we resubmit an io to the device but don't |
1331 | + * want to count it as part of io we log. |
1332 | + */ |
1333 | +#define fcache_bio_seen(bio) ((bio)->bi_end_io == fcache_extent_endio) |
1334 | + |
1335 | +static int fcache_make_request(request_queue_t *q, struct bio *bio) |
1336 | +{ |
1337 | + struct fcache_dev *fdev = &fcache_dev; |
1338 | + |
1339 | + /* |
1340 | + * If it's in the sector range we are monitoring and the device isn't |
1341 | + * being shutdown, then pass it on. Assume a bio doesn't span into |
1342 | + * the next partition, so don't bother accounting for size. |
1343 | + */ |
1344 | + if ((bio->bi_sector >= fdev->fs_start_sector) && |
1345 | + (bio->bi_sector < (fdev->fs_start_sector + fdev->fs_sectors)) && |
1346 | + !test_bit(FDEV_F_DOWN, &fdev->flags) && |
1347 | + !fcache_bio_seen(bio)) { |
1348 | + |
1349 | + fdev->ios[bio_data_dir(bio)]++; |
1350 | + |
1351 | + if (bio_data_dir(bio) == READ) |
1352 | + return fcache_read_request(fdev, q, bio); |
1353 | + |
1354 | + return fcache_write_request(fdev, q, bio); |
1355 | + } |
1356 | + |
1357 | + /* |
1358 | + * Pass through to original make_request_fn. |
1359 | + */ |
1360 | + return fdev->mfn(q, bio); |
1361 | +} |
1362 | + |
1363 | +/* |
1364 | + * Attach the cache device 'bdev' to 'fdev'. |
1365 | + */ |
1366 | +static int fcache_setup_dev(struct fcache_dev *fdev, |
1367 | + struct block_device *fs_bdev, |
1368 | + struct block_device *bdev, |
1369 | + int priming, int serial) |
1370 | +{ |
1371 | + request_queue_t *fs_q, *cache_q; |
1372 | + char b[BDEVNAME_SIZE]; |
1373 | + int ret; |
1374 | + |
1375 | + memset(fdev, 0, sizeof(*fdev)); |
1376 | + INIT_PRIO_TREE_ROOT(&fdev->prio_root); |
1377 | + spin_lock_init(&fdev->lock); |
1378 | + INIT_LIST_HEAD(&fdev->list); |
1379 | + INIT_WORK(&fdev->work, fcache_work, fdev); |
1380 | + fdev->priming = priming; |
1381 | + fdev->fs_bdev = fs_bdev; |
1382 | + fdev->bdev = bdev; |
1383 | + |
1384 | + ret = -EINVAL; |
1385 | + |
1386 | + fs_q = bdev_get_queue(fs_bdev); |
1387 | + cache_q = bdev_get_queue(bdev); |
1388 | + if (!fs_q || !cache_q) |
1389 | + goto out; |
1390 | + |
1391 | + /* |
1392 | + * Chop up outgoing ios, if the target is a different queue. We could |
1393 | + * look closer at limits, but it's fragile and pretty pointless. |
1394 | + */ |
1395 | + if (fs_q != cache_q) |
1396 | + fdev->chop_ios = 1; |
1397 | + |
1398 | + ret = bd_claim(bdev, fcache_setup_dev); |
1399 | + if (ret < 0) |
1400 | + goto out; |
1401 | + |
1402 | + ret = block_size(bdev); |
1403 | + if (ret != PAGE_SIZE) { |
1404 | + fdev->old_bs = ret; |
1405 | + ret = set_blocksize(bdev, PAGE_SIZE); |
1406 | + if (ret < 0) |
1407 | + goto out_release; |
1408 | + } else |
1409 | + ret = 0; |
1410 | + |
1411 | + fcache_fill_cache_size(fdev); |
1412 | + fcache_fill_fs_size(fdev); |
1413 | + |
1414 | + if (priming) { |
1415 | + fdev->serial = serial; |
1416 | + ret = fcache_write_new_header(fdev); |
1417 | + } else |
1418 | + ret = fcache_load_header(fdev, serial); |
1419 | + |
1420 | + if (!ret) { |
1421 | + printk("fcache: %s opened successfully (%spriming)\n", |
1422 | + bdevname(bdev, b), |
1423 | + priming ? "" : "not "); |
1424 | + return 0; |
1425 | + } |
1426 | + |
1427 | +out_release: |
1428 | + bd_release(fdev->bdev); |
1429 | +out: |
1430 | + blkdev_put(fdev->bdev); |
1431 | + fdev->bdev = NULL; |
1432 | + return ret; |
1433 | +} |
1434 | + |
1435 | +/* |
1436 | + * Return fdev->bdev to its original state. |
1437 | + */ |
1438 | +static void fcache_shutdown_dev(struct fcache_dev *fdev, |
1439 | + struct block_device *bdev) |
1440 | +{ |
1441 | + if (fdev->bdev) { |
1442 | + if (fdev->mfn) { |
1443 | + request_queue_t *q = bdev_get_queue(bdev); |
1444 | + |
1445 | + (void) xchg(&q->make_request_fn, fdev->mfn); |
1446 | + } |
1447 | + sync_blockdev(fdev->bdev); |
1448 | + if (fdev->old_bs) |
1449 | + set_blocksize(fdev->bdev, fdev->old_bs); |
1450 | + |
1451 | + bd_release(fdev->bdev); |
1452 | + blkdev_put(fdev->bdev); |
1453 | + fdev->bdev = NULL; |
1454 | + INIT_PRIO_TREE_ROOT(&fdev->prio_root); |
1455 | + } |
1456 | +} |
1457 | + |
1458 | +/* |
1459 | + * bdev is the file system device, cache_dev is the device we want to store |
1460 | + * the cache on. |
1461 | + */ |
1462 | +int fcache_dev_open(struct block_device *bdev, unsigned long cache_dev, |
1463 | + int priming, int serial) |
1464 | +{ |
1465 | + struct block_device *fcache_bdev; |
1466 | + request_queue_t *q; |
1467 | + int ret; |
1468 | + |
1469 | + if (disable) |
1470 | + return 0; |
1471 | + if (fcache_dev.bdev) |
1472 | + return -EBUSY; |
1473 | + |
1474 | + fcache_bdev = open_by_devnum(cache_dev, FMODE_READ|FMODE_WRITE); |
1475 | + if (IS_ERR(fcache_bdev)) |
1476 | + return PTR_ERR(fcache_bdev); |
1477 | + |
1478 | + ret = fcache_setup_dev(&fcache_dev, bdev, fcache_bdev, priming, serial); |
1479 | + if (ret) |
1480 | + return ret; |
1481 | + |
1482 | + q = bdev_get_queue(bdev); |
1483 | + fcache_dev.mfn = xchg(&q->make_request_fn, fcache_make_request); |
1484 | + return 0; |
1485 | +} |
1486 | + |
1487 | +EXPORT_SYMBOL(fcache_dev_open); |
1488 | + |
1489 | +void fcache_dev_close(struct block_device *bdev, int serial) |
1490 | +{ |
1491 | + struct fcache_dev *fdev = &fcache_dev; |
1492 | + |
1493 | + if (disable) |
1494 | + return; |
1495 | + |
1496 | + if (!fdev->bdev) |
1497 | + return; |
1498 | + |
1499 | + printk("fcache: ios r/w %u/%u, hits %u, misses %u, overwrites %u\n", |
1500 | + fdev->ios[0], fdev->ios[1], fdev->hits, |
1501 | + fdev->misses, fdev->overwrites); |
1502 | + fdev->serial = serial; |
1503 | + |
1504 | + sync_blockdev(bdev); |
1505 | + set_bit(FDEV_F_DOWN, &fdev->flags); |
1506 | + |
1507 | + if (fdev->priming) |
1508 | + fcache_write_extents(fdev); |
1509 | + |
1510 | + fcache_write_header(fdev); |
1511 | + fcache_free_prio_tree(fdev); |
1512 | + fcache_shutdown_dev(fdev, bdev); |
1513 | +} |
1514 | + |
1515 | +EXPORT_SYMBOL(fcache_dev_close); |
1516 | + |
1517 | +static int fcache_init(void) |
1518 | +{ |
1519 | + fcache_slab = kmem_cache_create("fcache", sizeof(struct fcache_extent), |
1520 | + 0, 0, NULL, NULL); |
1521 | + if (!fcache_slab) |
1522 | + return -ENOMEM; |
1523 | + |
1524 | + fcache_fed_slab = kmem_cache_create("fcache_fed", |
1525 | + sizeof(struct fcache_endio_data), 0, 0, |
1526 | + NULL, NULL); |
1527 | + if (!fcache_fed_slab) { |
1528 | + kmem_cache_destroy(fcache_slab); |
1529 | + return -ENOMEM; |
1530 | + } |
1531 | + |
1532 | + fed_pool = mempool_create_slab_pool(1, fcache_fed_slab); |
1533 | + if (!fed_pool) { |
1534 | + kmem_cache_destroy(fcache_slab); |
1535 | + kmem_cache_destroy(fcache_fed_slab); |
1536 | + return -ENOMEM; |
1537 | + } |
1538 | + |
1539 | + fcache_workqueue = create_singlethread_workqueue("fcached"); |
1540 | + if (!fcache_workqueue) |
1541 | + panic("fcache: failed to create fcached\n"); |
1542 | + |
1543 | + return 0; |
1544 | +} |
1545 | + |
1546 | +static void fcache_exit(void) |
1547 | +{ |
1548 | + destroy_workqueue(fcache_workqueue); |
1549 | + kmem_cache_destroy(fcache_slab); |
1550 | + kmem_cache_destroy(fcache_fed_slab); |
1551 | + mempool_destroy(fed_pool); |
1552 | +} |
1553 | + |
1554 | +MODULE_AUTHOR("Jens Axboe <axboe@suse.de>"); |
1555 | +MODULE_LICENSE("GPL"); |
1556 | + |
1557 | +module_init(fcache_init); |
1558 | +module_exit(fcache_exit); |
1559 | Index: linux-ck-dev/fs/ext3/super.c |
1560 | =================================================================== |
1561 | --- linux-ck-dev.orig/fs/ext3/super.c 2006-06-18 15:20:10.000000000 +1000 |
1562 | +++ linux-ck-dev/fs/ext3/super.c 2006-06-18 15:25:27.000000000 +1000 |
1563 | @@ -384,11 +384,43 @@ static void dump_orphan_list(struct supe |
1564 | } |
1565 | } |
1566 | |
1567 | +extern int fcache_dev_open(struct block_device *, unsigned long, int, int); |
1568 | +extern int fcache_dev_close(struct block_device *, int); |
1569 | + |
1570 | +static void ext3_close_fcache(struct super_block *sb) |
1571 | +{ |
1572 | + struct ext3_sb_info *sbi = EXT3_SB(sb); |
1573 | + struct ext3_super_block *es = sbi->s_es; |
1574 | + int serial = le16_to_cpu(es->s_mnt_count); |
1575 | + |
1576 | + fcache_dev_close(sb->s_bdev, serial); |
1577 | +} |
1578 | + |
1579 | +static int ext3_open_fcache(struct super_block *sb, unsigned long cachedev) |
1580 | +{ |
1581 | + struct ext3_sb_info *sbi = EXT3_SB(sb); |
1582 | + struct ext3_super_block *es = sbi->s_es; |
1583 | + int priming = test_opt(sb, FCACHEPRIME); |
1584 | + int serial = le16_to_cpu(es->s_mnt_count); |
1585 | + int ret; |
1586 | + |
1587 | + ret = fcache_dev_open(sb->s_bdev, cachedev, priming, serial); |
1588 | + if (!ret) { |
1589 | + set_opt(sbi->s_mount_opt, FCACHE); |
1590 | + return 0; |
1591 | + } |
1592 | + |
1593 | + printk(KERN_ERR "ext3: failed to open fcache (err=%d)\n", ret); |
1594 | + return ret; |
1595 | +} |
1596 | + |
1597 | static void ext3_put_super (struct super_block * sb) |
1598 | { |
1599 | struct ext3_sb_info *sbi = EXT3_SB(sb); |
1600 | struct ext3_super_block *es = sbi->s_es; |
1601 | - int i; |
1602 | + int i, has_fcache; |
1603 | + |
1604 | + has_fcache = test_opt(sb, FCACHE); |
1605 | |
1606 | ext3_xattr_put_super(sb); |
1607 | journal_destroy(sbi->s_journal); |
1608 | @@ -431,6 +463,8 @@ static void ext3_put_super (struct super |
1609 | invalidate_bdev(sbi->journal_bdev, 0); |
1610 | ext3_blkdev_remove(sbi); |
1611 | } |
1612 | + if (has_fcache) |
1613 | + ext3_close_fcache(sb); |
1614 | sb->s_fs_info = NULL; |
1615 | kfree(sbi); |
1616 | return; |
1617 | @@ -635,7 +669,7 @@ enum { |
1618 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
1619 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, |
1620 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, |
1621 | - Opt_grpquota |
1622 | + Opt_grpquota, Opt_fcache_dev, Opt_fcache_prime, |
1623 | }; |
1624 | |
1625 | static match_table_t tokens = { |
1626 | @@ -684,6 +718,8 @@ static match_table_t tokens = { |
1627 | {Opt_quota, "quota"}, |
1628 | {Opt_usrquota, "usrquota"}, |
1629 | {Opt_barrier, "barrier=%u"}, |
1630 | + {Opt_fcache_dev, "fcache_dev=%s"}, |
1631 | + {Opt_fcache_prime, "fcache_prime=%u"}, |
1632 | {Opt_err, NULL}, |
1633 | {Opt_resize, "resize"}, |
1634 | }; |
1635 | @@ -710,6 +746,7 @@ static unsigned long get_sb_block(void * |
1636 | |
1637 | static int parse_options (char *options, struct super_block *sb, |
1638 | unsigned long *inum, unsigned long *journal_devnum, |
1639 | + unsigned long *fcache_devnum, |
1640 | unsigned long *n_blocks_count, int is_remount) |
1641 | { |
1642 | struct ext3_sb_info *sbi = EXT3_SB(sb); |
1643 | @@ -1012,6 +1049,29 @@ clear_qf_name: |
1644 | case Opt_nobh: |
1645 | set_opt(sbi->s_mount_opt, NOBH); |
1646 | break; |
1647 | + case Opt_fcache_dev: { |
1648 | + int maj, min; |
1649 | + char *p, *pm; |
1650 | + |
1651 | + if (!fcache_devnum) |
1652 | + break; |
1653 | + p = match_strdup(&args[0]); |
1654 | + if (!p) |
1655 | + return 0; |
1656 | + maj = simple_strtol(p, &pm, 10); |
1657 | + min = simple_strtol(pm + 1, NULL, 10); |
1658 | + *fcache_devnum = maj << MINORBITS | min; |
1659 | + kfree(p); |
1660 | + break; |
1661 | + } |
1662 | + case Opt_fcache_prime: |
1663 | + if (match_int(&args[0], &option)) |
1664 | + return 0; |
1665 | + if (option) |
1666 | + set_opt(sbi->s_mount_opt, FCACHEPRIME); |
1667 | + else |
1668 | + clear_opt(sbi->s_mount_opt, FCACHEPRIME); |
1669 | + break; |
1670 | default: |
1671 | printk (KERN_ERR |
1672 | "EXT3-fs: Unrecognized mount option \"%s\" " |
1673 | @@ -1346,6 +1406,7 @@ static int ext3_fill_super (struct super |
1674 | unsigned long offset = 0; |
1675 | unsigned long journal_inum = 0; |
1676 | unsigned long journal_devnum = 0; |
1677 | + unsigned long fcache_devnum = 0; |
1678 | unsigned long def_mount_opts; |
1679 | struct inode *root; |
1680 | int blocksize; |
1681 | @@ -1353,6 +1414,7 @@ static int ext3_fill_super (struct super |
1682 | int db_count; |
1683 | int i; |
1684 | int needs_recovery; |
1685 | + int fcache = 0; |
1686 | __le32 features; |
1687 | |
1688 | sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); |
1689 | @@ -1427,7 +1489,7 @@ static int ext3_fill_super (struct super |
1690 | set_opt(sbi->s_mount_opt, RESERVATION); |
1691 | |
1692 | if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, |
1693 | - NULL, 0)) |
1694 | + &fcache_devnum, NULL, 0)) |
1695 | goto failed_mount; |
1696 | |
1697 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | |
1698 | @@ -1651,6 +1713,9 @@ static int ext3_fill_super (struct super |
1699 | goto failed_mount2; |
1700 | } |
1701 | |
1702 | + if (fcache_devnum) |
1703 | + fcache = ext3_open_fcache(sb, fcache_devnum); |
1704 | + |
1705 | /* We have now updated the journal if required, so we can |
1706 | * validate the data journaling mode. */ |
1707 | switch (test_opt(sb, DATA_FLAGS)) { |
1708 | @@ -1740,6 +1805,8 @@ cantfind_ext3: |
1709 | goto failed_mount; |
1710 | |
1711 | failed_mount3: |
1712 | + if (!fcache) |
1713 | + ext3_close_fcache(sb); |
1714 | journal_destroy(sbi->s_journal); |
1715 | failed_mount2: |
1716 | for (i = 0; i < db_count; i++) |
1717 | @@ -2205,6 +2272,7 @@ static int ext3_remount (struct super_bl |
1718 | struct ext3_sb_info *sbi = EXT3_SB(sb); |
1719 | unsigned long n_blocks_count = 0; |
1720 | unsigned long old_sb_flags; |
1721 | + unsigned long fcache_devnum = 0; |
1722 | struct ext3_mount_options old_opts; |
1723 | int err; |
1724 | #ifdef CONFIG_QUOTA |
1725 | @@ -2226,7 +2294,7 @@ static int ext3_remount (struct super_bl |
1726 | /* |
1727 | * Allow the "check" option to be passed as a remount option. |
1728 | */ |
1729 | - if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { |
1730 | + if (!parse_options(data, sb, NULL, NULL, &fcache_devnum, &n_blocks_count, 1)) { |
1731 | err = -EINVAL; |
1732 | goto restore_opts; |
1733 | } |
1734 | @@ -2241,6 +2309,11 @@ static int ext3_remount (struct super_bl |
1735 | |
1736 | ext3_init_journal_params(sb, sbi->s_journal); |
1737 | |
1738 | + if (fcache_devnum) { |
1739 | + ext3_close_fcache(sb); |
1740 | + ext3_open_fcache(sb, fcache_devnum); |
1741 | + } |
1742 | + |
1743 | if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || |
1744 | n_blocks_count > le32_to_cpu(es->s_blocks_count)) { |
1745 | if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) { |
1746 | Index: linux-ck-dev/include/linux/bio.h |
1747 | =================================================================== |
1748 | --- linux-ck-dev.orig/include/linux/bio.h 2006-06-18 15:20:10.000000000 +1000 |
1749 | +++ linux-ck-dev/include/linux/bio.h 2006-06-18 15:25:27.000000000 +1000 |
1750 | @@ -124,6 +124,7 @@ struct bio { |
1751 | #define BIO_BOUNCED 5 /* bio is a bounce bio */ |
1752 | #define BIO_USER_MAPPED 6 /* contains user pages */ |
1753 | #define BIO_EOPNOTSUPP 7 /* not supported */ |
1754 | +#define BIO_NOMERGE 8 /* bio not mergeable */ |
1755 | #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) |
1756 | |
1757 | /* |
1758 | @@ -179,6 +180,14 @@ struct bio { |
1759 | #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) |
1760 | #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) |
1761 | |
1762 | +static inline int bio_mergeable(struct bio *bio) |
1763 | +{ |
1764 | + if (!bio_barrier(bio) && !bio->bi_idx && !bio_flagged(bio, BIO_NOMERGE)) |
1765 | + return 1; |
1766 | + |
1767 | + return 0; |
1768 | +} |
1769 | + |
1770 | /* |
1771 | * will die |
1772 | */ |
1773 | Index: linux-ck-dev/include/linux/ext3_fs.h |
1774 | =================================================================== |
1775 | --- linux-ck-dev.orig/include/linux/ext3_fs.h 2006-06-18 15:20:10.000000000 +1000 |
1776 | +++ linux-ck-dev/include/linux/ext3_fs.h 2006-06-18 15:25:27.000000000 +1000 |
1777 | @@ -376,6 +376,8 @@ struct ext3_inode { |
1778 | #define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */ |
1779 | #define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ |
1780 | #define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ |
1781 | +#define EXT3_MOUNT_FCACHE 0x400000 /* using fcache */ |
1782 | +#define EXT3_MOUNT_FCACHEPRIME 0x800000 /* priming fcache */ |
1783 | |
1784 | /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ |
1785 | #ifndef _LINUX_EXT2_FS_H |
1786 | @@ -847,6 +849,18 @@ extern struct inode_operations ext3_spec |
1787 | extern struct inode_operations ext3_symlink_inode_operations; |
1788 | extern struct inode_operations ext3_fast_symlink_inode_operations; |
1789 | |
1790 | +#ifndef CONFIG_BLK_FCACHE |
1791 | +static inline int fcache_dev_open(struct block_device *bdev, |
1792 | + unsigned long cache_dev, int priming, int serial) |
1793 | +{ |
1794 | + return -ENODEV; |
1795 | +} |
1796 | + |
1797 | +static inline int fcache_dev_close(struct block_device *bdev, int serial) |
1798 | +{ |
1799 | + return 0; |
1800 | +} |
1801 | +#endif /* CONFIG_BLK_FCACHE */ |
1802 | |
1803 | #endif /* __KERNEL__ */ |
1804 |