Contents of /alx-src/tags/kernel26-2.6.12-alx-r9/mm/highmem.c
Parent Directory | Revision Log
Revision 630 -
(show annotations)
(download)
Wed Mar 4 11:03:09 2009 UTC (15 years, 6 months ago) by niro
File MIME type: text/plain
File size: 14010 byte(s)
Wed Mar 4 11:03:09 2009 UTC (15 years, 6 months ago) by niro
File MIME type: text/plain
File size: 14010 byte(s)
Tag kernel26-2.6.12-alx-r9
1 | /* |
2 | * High memory handling common code and variables. |
3 | * |
4 | * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de |
5 | * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de |
6 | * |
7 | * |
8 | * Redesigned the x86 32-bit VM architecture to deal with |
9 | * 64-bit physical space. With current x86 CPUs this |
10 | * means up to 64 Gigabytes physical RAM. |
11 | * |
12 | * Rewrote high memory support to move the page cache into |
13 | * high memory. Implemented permanent (schedulable) kmaps |
14 | * based on Linus' idea. |
15 | * |
16 | * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> |
17 | */ |
18 | |
19 | #include <linux/mm.h> |
20 | #include <linux/module.h> |
21 | #include <linux/swap.h> |
22 | #include <linux/bio.h> |
23 | #include <linux/pagemap.h> |
24 | #include <linux/mempool.h> |
25 | #include <linux/blkdev.h> |
26 | #include <linux/init.h> |
27 | #include <linux/hash.h> |
28 | #include <linux/highmem.h> |
29 | #include <asm/tlbflush.h> |
30 | |
31 | static mempool_t *page_pool, *isa_page_pool; |
32 | |
33 | static void *page_pool_alloc(unsigned int __nocast gfp_mask, void *data) |
34 | { |
35 | unsigned int gfp = gfp_mask | (unsigned int) (long) data; |
36 | |
37 | return alloc_page(gfp); |
38 | } |
39 | |
40 | static void page_pool_free(void *page, void *data) |
41 | { |
42 | __free_page(page); |
43 | } |
44 | |
45 | /* |
46 | * Virtual_count is not a pure "count". |
47 | * 0 means that it is not mapped, and has not been mapped |
48 | * since a TLB flush - it is usable. |
49 | * 1 means that there are no users, but it has been mapped |
50 | * since the last TLB flush - so we can't use it. |
51 | * n means that there are (n-1) current users of it. |
52 | */ |
53 | #ifdef CONFIG_HIGHMEM |
54 | static int pkmap_count[LAST_PKMAP]; |
55 | static unsigned int last_pkmap_nr; |
56 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock); |
57 | |
58 | pte_t * pkmap_page_table; |
59 | |
60 | static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); |
61 | |
62 | static void flush_all_zero_pkmaps(void) |
63 | { |
64 | int i; |
65 | |
66 | flush_cache_kmaps(); |
67 | |
68 | for (i = 0; i < LAST_PKMAP; i++) { |
69 | struct page *page; |
70 | |
71 | /* |
72 | * zero means we don't have anything to do, |
73 | * >1 means that it is still in use. Only |
74 | * a count of 1 means that it is free but |
75 | * needs to be unmapped |
76 | */ |
77 | if (pkmap_count[i] != 1) |
78 | continue; |
79 | pkmap_count[i] = 0; |
80 | |
81 | /* sanity check */ |
82 | if (pte_none(pkmap_page_table[i])) |
83 | BUG(); |
84 | |
85 | /* |
86 | * Don't need an atomic fetch-and-clear op here; |
87 | * no-one has the page mapped, and cannot get at |
88 | * its virtual address (and hence PTE) without first |
89 | * getting the kmap_lock (which is held here). |
90 | * So no dangers, even with speculative execution. |
91 | */ |
92 | page = pte_page(pkmap_page_table[i]); |
93 | pte_clear(&init_mm, (unsigned long)page_address(page), |
94 | &pkmap_page_table[i]); |
95 | |
96 | set_page_address(page, NULL); |
97 | } |
98 | flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP)); |
99 | } |
100 | |
101 | static inline unsigned long map_new_virtual(struct page *page) |
102 | { |
103 | unsigned long vaddr; |
104 | int count; |
105 | |
106 | start: |
107 | count = LAST_PKMAP; |
108 | /* Find an empty entry */ |
109 | for (;;) { |
110 | last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; |
111 | if (!last_pkmap_nr) { |
112 | flush_all_zero_pkmaps(); |
113 | count = LAST_PKMAP; |
114 | } |
115 | if (!pkmap_count[last_pkmap_nr]) |
116 | break; /* Found a usable entry */ |
117 | if (--count) |
118 | continue; |
119 | |
120 | /* |
121 | * Sleep for somebody else to unmap their entries |
122 | */ |
123 | { |
124 | DECLARE_WAITQUEUE(wait, current); |
125 | |
126 | __set_current_state(TASK_UNINTERRUPTIBLE); |
127 | add_wait_queue(&pkmap_map_wait, &wait); |
128 | spin_unlock(&kmap_lock); |
129 | schedule(); |
130 | remove_wait_queue(&pkmap_map_wait, &wait); |
131 | spin_lock(&kmap_lock); |
132 | |
133 | /* Somebody else might have mapped it while we slept */ |
134 | if (page_address(page)) |
135 | return (unsigned long)page_address(page); |
136 | |
137 | /* Re-start */ |
138 | goto start; |
139 | } |
140 | } |
141 | vaddr = PKMAP_ADDR(last_pkmap_nr); |
142 | set_pte_at(&init_mm, vaddr, |
143 | &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot)); |
144 | |
145 | pkmap_count[last_pkmap_nr] = 1; |
146 | set_page_address(page, (void *)vaddr); |
147 | |
148 | return vaddr; |
149 | } |
150 | |
151 | void fastcall *kmap_high(struct page *page) |
152 | { |
153 | unsigned long vaddr; |
154 | |
155 | /* |
156 | * For highmem pages, we can't trust "virtual" until |
157 | * after we have the lock. |
158 | * |
159 | * We cannot call this from interrupts, as it may block |
160 | */ |
161 | spin_lock(&kmap_lock); |
162 | vaddr = (unsigned long)page_address(page); |
163 | if (!vaddr) |
164 | vaddr = map_new_virtual(page); |
165 | pkmap_count[PKMAP_NR(vaddr)]++; |
166 | if (pkmap_count[PKMAP_NR(vaddr)] < 2) |
167 | BUG(); |
168 | spin_unlock(&kmap_lock); |
169 | return (void*) vaddr; |
170 | } |
171 | |
172 | EXPORT_SYMBOL(kmap_high); |
173 | |
174 | void fastcall kunmap_high(struct page *page) |
175 | { |
176 | unsigned long vaddr; |
177 | unsigned long nr; |
178 | int need_wakeup; |
179 | |
180 | spin_lock(&kmap_lock); |
181 | vaddr = (unsigned long)page_address(page); |
182 | if (!vaddr) |
183 | BUG(); |
184 | nr = PKMAP_NR(vaddr); |
185 | |
186 | /* |
187 | * A count must never go down to zero |
188 | * without a TLB flush! |
189 | */ |
190 | need_wakeup = 0; |
191 | switch (--pkmap_count[nr]) { |
192 | case 0: |
193 | BUG(); |
194 | case 1: |
195 | /* |
196 | * Avoid an unnecessary wake_up() function call. |
197 | * The common case is pkmap_count[] == 1, but |
198 | * no waiters. |
199 | * The tasks queued in the wait-queue are guarded |
200 | * by both the lock in the wait-queue-head and by |
201 | * the kmap_lock. As the kmap_lock is held here, |
202 | * no need for the wait-queue-head's lock. Simply |
203 | * test if the queue is empty. |
204 | */ |
205 | need_wakeup = waitqueue_active(&pkmap_map_wait); |
206 | } |
207 | spin_unlock(&kmap_lock); |
208 | |
209 | /* do wake-up, if needed, race-free outside of the spin lock */ |
210 | if (need_wakeup) |
211 | wake_up(&pkmap_map_wait); |
212 | } |
213 | |
214 | EXPORT_SYMBOL(kunmap_high); |
215 | |
216 | #define POOL_SIZE 64 |
217 | |
218 | static __init int init_emergency_pool(void) |
219 | { |
220 | struct sysinfo i; |
221 | si_meminfo(&i); |
222 | si_swapinfo(&i); |
223 | |
224 | if (!i.totalhigh) |
225 | return 0; |
226 | |
227 | page_pool = mempool_create(POOL_SIZE, page_pool_alloc, page_pool_free, NULL); |
228 | if (!page_pool) |
229 | BUG(); |
230 | printk("highmem bounce pool size: %d pages\n", POOL_SIZE); |
231 | |
232 | return 0; |
233 | } |
234 | |
235 | __initcall(init_emergency_pool); |
236 | |
237 | /* |
238 | * highmem version, map in to vec |
239 | */ |
240 | static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) |
241 | { |
242 | unsigned long flags; |
243 | unsigned char *vto; |
244 | |
245 | local_irq_save(flags); |
246 | vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ); |
247 | memcpy(vto + to->bv_offset, vfrom, to->bv_len); |
248 | kunmap_atomic(vto, KM_BOUNCE_READ); |
249 | local_irq_restore(flags); |
250 | } |
251 | |
252 | #else /* CONFIG_HIGHMEM */ |
253 | |
254 | #define bounce_copy_vec(to, vfrom) \ |
255 | memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len) |
256 | |
257 | #endif |
258 | |
259 | #define ISA_POOL_SIZE 16 |
260 | |
261 | /* |
262 | * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA |
263 | * as the max address, so check if the pool has already been created. |
264 | */ |
265 | int init_emergency_isa_pool(void) |
266 | { |
267 | if (isa_page_pool) |
268 | return 0; |
269 | |
270 | isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc, page_pool_free, (void *) __GFP_DMA); |
271 | if (!isa_page_pool) |
272 | BUG(); |
273 | |
274 | printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE); |
275 | return 0; |
276 | } |
277 | |
278 | /* |
279 | * Simple bounce buffer support for highmem pages. Depending on the |
280 | * queue gfp mask set, *to may or may not be a highmem page. kmap it |
281 | * always, it will do the Right Thing |
282 | */ |
283 | static void copy_to_high_bio_irq(struct bio *to, struct bio *from) |
284 | { |
285 | unsigned char *vfrom; |
286 | struct bio_vec *tovec, *fromvec; |
287 | int i; |
288 | |
289 | __bio_for_each_segment(tovec, to, i, 0) { |
290 | fromvec = from->bi_io_vec + i; |
291 | |
292 | /* |
293 | * not bounced |
294 | */ |
295 | if (tovec->bv_page == fromvec->bv_page) |
296 | continue; |
297 | |
298 | /* |
299 | * fromvec->bv_offset and fromvec->bv_len might have been |
300 | * modified by the block layer, so use the original copy, |
301 | * bounce_copy_vec already uses tovec->bv_len |
302 | */ |
303 | vfrom = page_address(fromvec->bv_page) + tovec->bv_offset; |
304 | |
305 | flush_dcache_page(tovec->bv_page); |
306 | bounce_copy_vec(tovec, vfrom); |
307 | } |
308 | } |
309 | |
310 | static void bounce_end_io(struct bio *bio, mempool_t *pool, int err) |
311 | { |
312 | struct bio *bio_orig = bio->bi_private; |
313 | struct bio_vec *bvec, *org_vec; |
314 | int i; |
315 | |
316 | if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) |
317 | set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags); |
318 | |
319 | /* |
320 | * free up bounce indirect pages used |
321 | */ |
322 | __bio_for_each_segment(bvec, bio, i, 0) { |
323 | org_vec = bio_orig->bi_io_vec + i; |
324 | if (bvec->bv_page == org_vec->bv_page) |
325 | continue; |
326 | |
327 | mempool_free(bvec->bv_page, pool); |
328 | dec_page_state(nr_bounce); |
329 | } |
330 | |
331 | bio_endio(bio_orig, bio_orig->bi_size, err); |
332 | bio_put(bio); |
333 | } |
334 | |
335 | static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done,int err) |
336 | { |
337 | if (bio->bi_size) |
338 | return 1; |
339 | |
340 | bounce_end_io(bio, page_pool, err); |
341 | return 0; |
342 | } |
343 | |
344 | static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err) |
345 | { |
346 | if (bio->bi_size) |
347 | return 1; |
348 | |
349 | bounce_end_io(bio, isa_page_pool, err); |
350 | return 0; |
351 | } |
352 | |
353 | static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err) |
354 | { |
355 | struct bio *bio_orig = bio->bi_private; |
356 | |
357 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) |
358 | copy_to_high_bio_irq(bio_orig, bio); |
359 | |
360 | bounce_end_io(bio, pool, err); |
361 | } |
362 | |
363 | static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err) |
364 | { |
365 | if (bio->bi_size) |
366 | return 1; |
367 | |
368 | __bounce_end_io_read(bio, page_pool, err); |
369 | return 0; |
370 | } |
371 | |
372 | static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err) |
373 | { |
374 | if (bio->bi_size) |
375 | return 1; |
376 | |
377 | __bounce_end_io_read(bio, isa_page_pool, err); |
378 | return 0; |
379 | } |
380 | |
381 | static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig, |
382 | mempool_t *pool) |
383 | { |
384 | struct page *page; |
385 | struct bio *bio = NULL; |
386 | int i, rw = bio_data_dir(*bio_orig); |
387 | struct bio_vec *to, *from; |
388 | |
389 | bio_for_each_segment(from, *bio_orig, i) { |
390 | page = from->bv_page; |
391 | |
392 | /* |
393 | * is destination page below bounce pfn? |
394 | */ |
395 | if (page_to_pfn(page) < q->bounce_pfn) |
396 | continue; |
397 | |
398 | /* |
399 | * irk, bounce it |
400 | */ |
401 | if (!bio) |
402 | bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt); |
403 | |
404 | to = bio->bi_io_vec + i; |
405 | |
406 | to->bv_page = mempool_alloc(pool, q->bounce_gfp); |
407 | to->bv_len = from->bv_len; |
408 | to->bv_offset = from->bv_offset; |
409 | inc_page_state(nr_bounce); |
410 | |
411 | if (rw == WRITE) { |
412 | char *vto, *vfrom; |
413 | |
414 | flush_dcache_page(from->bv_page); |
415 | vto = page_address(to->bv_page) + to->bv_offset; |
416 | vfrom = kmap(from->bv_page) + from->bv_offset; |
417 | memcpy(vto, vfrom, to->bv_len); |
418 | kunmap(from->bv_page); |
419 | } |
420 | } |
421 | |
422 | /* |
423 | * no pages bounced |
424 | */ |
425 | if (!bio) |
426 | return; |
427 | |
428 | /* |
429 | * at least one page was bounced, fill in possible non-highmem |
430 | * pages |
431 | */ |
432 | __bio_for_each_segment(from, *bio_orig, i, 0) { |
433 | to = bio_iovec_idx(bio, i); |
434 | if (!to->bv_page) { |
435 | to->bv_page = from->bv_page; |
436 | to->bv_len = from->bv_len; |
437 | to->bv_offset = from->bv_offset; |
438 | } |
439 | } |
440 | |
441 | bio->bi_bdev = (*bio_orig)->bi_bdev; |
442 | bio->bi_flags |= (1 << BIO_BOUNCED); |
443 | bio->bi_sector = (*bio_orig)->bi_sector; |
444 | bio->bi_rw = (*bio_orig)->bi_rw; |
445 | |
446 | bio->bi_vcnt = (*bio_orig)->bi_vcnt; |
447 | bio->bi_idx = (*bio_orig)->bi_idx; |
448 | bio->bi_size = (*bio_orig)->bi_size; |
449 | |
450 | if (pool == page_pool) { |
451 | bio->bi_end_io = bounce_end_io_write; |
452 | if (rw == READ) |
453 | bio->bi_end_io = bounce_end_io_read; |
454 | } else { |
455 | bio->bi_end_io = bounce_end_io_write_isa; |
456 | if (rw == READ) |
457 | bio->bi_end_io = bounce_end_io_read_isa; |
458 | } |
459 | |
460 | bio->bi_private = *bio_orig; |
461 | *bio_orig = bio; |
462 | } |
463 | |
464 | void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig) |
465 | { |
466 | mempool_t *pool; |
467 | |
468 | /* |
469 | * for non-isa bounce case, just check if the bounce pfn is equal |
470 | * to or bigger than the highest pfn in the system -- in that case, |
471 | * don't waste time iterating over bio segments |
472 | */ |
473 | if (!(q->bounce_gfp & GFP_DMA)) { |
474 | if (q->bounce_pfn >= blk_max_pfn) |
475 | return; |
476 | pool = page_pool; |
477 | } else { |
478 | BUG_ON(!isa_page_pool); |
479 | pool = isa_page_pool; |
480 | } |
481 | |
482 | /* |
483 | * slow path |
484 | */ |
485 | __blk_queue_bounce(q, bio_orig, pool); |
486 | } |
487 | |
488 | EXPORT_SYMBOL(blk_queue_bounce); |
489 | |
490 | #if defined(HASHED_PAGE_VIRTUAL) |
491 | |
492 | #define PA_HASH_ORDER 7 |
493 | |
494 | /* |
495 | * Describes one page->virtual association |
496 | */ |
497 | struct page_address_map { |
498 | struct page *page; |
499 | void *virtual; |
500 | struct list_head list; |
501 | }; |
502 | |
503 | /* |
504 | * page_address_map freelist, allocated from page_address_maps. |
505 | */ |
506 | static struct list_head page_address_pool; /* freelist */ |
507 | static spinlock_t pool_lock; /* protects page_address_pool */ |
508 | |
509 | /* |
510 | * Hash table bucket |
511 | */ |
512 | static struct page_address_slot { |
513 | struct list_head lh; /* List of page_address_maps */ |
514 | spinlock_t lock; /* Protect this bucket's list */ |
515 | } ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER]; |
516 | |
517 | static struct page_address_slot *page_slot(struct page *page) |
518 | { |
519 | return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)]; |
520 | } |
521 | |
522 | void *page_address(struct page *page) |
523 | { |
524 | unsigned long flags; |
525 | void *ret; |
526 | struct page_address_slot *pas; |
527 | |
528 | if (!PageHighMem(page)) |
529 | return lowmem_page_address(page); |
530 | |
531 | pas = page_slot(page); |
532 | ret = NULL; |
533 | spin_lock_irqsave(&pas->lock, flags); |
534 | if (!list_empty(&pas->lh)) { |
535 | struct page_address_map *pam; |
536 | |
537 | list_for_each_entry(pam, &pas->lh, list) { |
538 | if (pam->page == page) { |
539 | ret = pam->virtual; |
540 | goto done; |
541 | } |
542 | } |
543 | } |
544 | done: |
545 | spin_unlock_irqrestore(&pas->lock, flags); |
546 | return ret; |
547 | } |
548 | |
549 | EXPORT_SYMBOL(page_address); |
550 | |
551 | void set_page_address(struct page *page, void *virtual) |
552 | { |
553 | unsigned long flags; |
554 | struct page_address_slot *pas; |
555 | struct page_address_map *pam; |
556 | |
557 | BUG_ON(!PageHighMem(page)); |
558 | |
559 | pas = page_slot(page); |
560 | if (virtual) { /* Add */ |
561 | BUG_ON(list_empty(&page_address_pool)); |
562 | |
563 | spin_lock_irqsave(&pool_lock, flags); |
564 | pam = list_entry(page_address_pool.next, |
565 | struct page_address_map, list); |
566 | list_del(&pam->list); |
567 | spin_unlock_irqrestore(&pool_lock, flags); |
568 | |
569 | pam->page = page; |
570 | pam->virtual = virtual; |
571 | |
572 | spin_lock_irqsave(&pas->lock, flags); |
573 | list_add_tail(&pam->list, &pas->lh); |
574 | spin_unlock_irqrestore(&pas->lock, flags); |
575 | } else { /* Remove */ |
576 | spin_lock_irqsave(&pas->lock, flags); |
577 | list_for_each_entry(pam, &pas->lh, list) { |
578 | if (pam->page == page) { |
579 | list_del(&pam->list); |
580 | spin_unlock_irqrestore(&pas->lock, flags); |
581 | spin_lock_irqsave(&pool_lock, flags); |
582 | list_add_tail(&pam->list, &page_address_pool); |
583 | spin_unlock_irqrestore(&pool_lock, flags); |
584 | goto done; |
585 | } |
586 | } |
587 | spin_unlock_irqrestore(&pas->lock, flags); |
588 | } |
589 | done: |
590 | return; |
591 | } |
592 | |
593 | static struct page_address_map page_address_maps[LAST_PKMAP]; |
594 | |
595 | void __init page_address_init(void) |
596 | { |
597 | int i; |
598 | |
599 | INIT_LIST_HEAD(&page_address_pool); |
600 | for (i = 0; i < ARRAY_SIZE(page_address_maps); i++) |
601 | list_add(&page_address_maps[i].list, &page_address_pool); |
602 | for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) { |
603 | INIT_LIST_HEAD(&page_address_htable[i].lh); |
604 | spin_lock_init(&page_address_htable[i].lock); |
605 | } |
606 | spin_lock_init(&pool_lock); |
607 | } |
608 | |
609 | #endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */ |