Annotation of /trunk/kernel26-magellan/patches-2.6.17-r6/0017-2.6.17-mm-swap_prefetch-32.patch
Parent Directory | Revision Log
Revision 105 -
(hide annotations)
(download)
Sun Mar 11 16:17:56 2007 UTC (17 years, 6 months ago) by niro
File size: 31282 byte(s)
Sun Mar 11 16:17:56 2007 UTC (17 years, 6 months ago) by niro
File size: 31282 byte(s)
2.6.17-magellan-r6
1 | niro | 105 | Implement swap prefetching when the vm is relatively idle and there is free |
2 | ram available. The code is based on some preliminary code by Thomas | ||
3 | Schlichter. | ||
4 | |||
5 | This stores a list of swapped entries in a list ordered most recently used | ||
6 | and a radix tree. It generates a low priority kernel thread running at | ||
7 | nice 19 to do the prefetching at a later stage. | ||
8 | |||
9 | Once pages have been added to the swapped list, a timer is started, testing | ||
10 | for conditions suitable to prefetch swap pages every 5 seconds. Suitable | ||
11 | conditions are defined as lack of swapping out or in any pages, and no | ||
12 | watermark tests failing. Significant amounts of dirtied ram and changes in | ||
13 | free ram representing disk writes or reads also prevent prefetching. | ||
14 | |||
15 | It then checks that we have spare ram looking for at least 3* pages_high | ||
16 | free per zone and if it succeeds that will prefetch pages from swap into | ||
17 | the swap cache. The pages are added to the tail of the inactive list to | ||
18 | preserve LRU ordering. | ||
19 | |||
20 | Pages are prefetched until the list is empty or the vm is seen as busy | ||
21 | according to the previously described criteria. Node data on numa is | ||
22 | stored with the entries and an appropriate zonelist based on this is used | ||
23 | when allocating ram. | ||
24 | |||
25 | The pages are copied to swap cache and kept on backing store. This allows | ||
26 | pressure on either physical ram or swap to readily find free pages without | ||
27 | further I/O. | ||
28 | |||
29 | Prefetching can be enabled/disabled via the tunable in | ||
30 | /proc/sys/vm/swap_prefetch initially set to 1 (enabled). | ||
31 | |||
32 | Enabling laptop_mode disables swap prefetching to prevent unnecessary spin | ||
33 | ups. | ||
34 | |||
35 | In testing on modern pc hardware this results in wall-clock time activation | ||
36 | of the firefox browser to speed up 5 fold after a worst case complete | ||
37 | swap-out of the browser on a static web page. | ||
38 | |||
39 | From: Ingo Molnar <mingo@elte.hu> | ||
40 | |||
41 | Fix potential swap-prefetch deadlock, found by the locking correctness | ||
42 | validator. | ||
43 | |||
44 | Signed-off-by: Con Kolivas <kernel@kolivas.org> | ||
45 | Signed-off-by: Ingo Molnar <mingo@elte.hu> | ||
46 | Signed-off-by: Andrew Morton <akpm@osdl.org> | ||
47 | |||
48 | Documentation/sysctl/vm.txt | 11 | ||
49 | include/linux/mm_inline.h | 7 | ||
50 | include/linux/swap-prefetch.h | 55 +++ | ||
51 | include/linux/swap.h | 2 | ||
52 | include/linux/sysctl.h | 1 | ||
53 | init/Kconfig | 22 + | ||
54 | kernel/sysctl.c | 11 | ||
55 | mm/Makefile | 1 | ||
56 | mm/swap.c | 48 +++ | ||
57 | mm/swap_prefetch.c | 579 ++++++++++++++++++++++++++++++++++++++++++ | ||
58 | mm/swap_state.c | 11 | ||
59 | mm/vmscan.c | 6 | ||
60 | 12 files changed, 753 insertions(+), 1 deletion(-) | ||
61 | |||
62 | Index: linux-ck-dev/Documentation/sysctl/vm.txt | ||
63 | =================================================================== | ||
64 | --- linux-ck-dev.orig/Documentation/sysctl/vm.txt 2006-06-18 15:20:12.000000000 +1000 | ||
65 | +++ linux-ck-dev/Documentation/sysctl/vm.txt 2006-06-18 15:24:48.000000000 +1000 | ||
66 | @@ -29,6 +29,7 @@ Currently, these files are in /proc/sys/ | ||
67 | - drop-caches | ||
68 | - zone_reclaim_mode | ||
69 | - zone_reclaim_interval | ||
70 | +- swap_prefetch | ||
71 | |||
72 | ============================================================== | ||
73 | |||
74 | @@ -178,3 +179,13 @@ Time is set in seconds and set by defaul | ||
75 | Reduce the interval if undesired off node allocations occur. However, too | ||
76 | frequent scans will have a negative impact onoff node allocation performance. | ||
77 | |||
78 | +============================================================== | ||
79 | + | ||
80 | +swap_prefetch | ||
81 | + | ||
82 | +This enables or disables the swap prefetching feature. When the virtual | ||
83 | +memory subsystem has been extremely idle for at least 5 seconds it will start | ||
84 | +copying back pages from swap into the swapcache and keep a copy in swap. In | ||
85 | +practice it can take many minutes before the vm is idle enough. | ||
86 | + | ||
87 | +The default value is 1. | ||
88 | Index: linux-ck-dev/include/linux/swap.h | ||
89 | =================================================================== | ||
90 | --- linux-ck-dev.orig/include/linux/swap.h 2006-06-18 15:20:12.000000000 +1000 | ||
91 | +++ linux-ck-dev/include/linux/swap.h 2006-06-18 15:24:48.000000000 +1000 | ||
92 | @@ -165,6 +165,7 @@ extern unsigned int nr_free_pagecache_pa | ||
93 | /* linux/mm/swap.c */ | ||
94 | extern void FASTCALL(lru_cache_add(struct page *)); | ||
95 | extern void FASTCALL(lru_cache_add_active(struct page *)); | ||
96 | +extern void FASTCALL(lru_cache_add_tail(struct page *)); | ||
97 | extern void FASTCALL(activate_page(struct page *)); | ||
98 | extern void FASTCALL(mark_page_accessed(struct page *)); | ||
99 | extern void lru_add_drain(void); | ||
100 | @@ -232,6 +233,7 @@ extern void free_pages_and_swap_cache(st | ||
101 | extern struct page * lookup_swap_cache(swp_entry_t); | ||
102 | extern struct page * read_swap_cache_async(swp_entry_t, struct vm_area_struct *vma, | ||
103 | unsigned long addr); | ||
104 | +extern int add_to_swap_cache(struct page *page, swp_entry_t entry); | ||
105 | /* linux/mm/swapfile.c */ | ||
106 | extern long total_swap_pages; | ||
107 | extern unsigned int nr_swapfiles; | ||
108 | Index: linux-ck-dev/include/linux/sysctl.h | ||
109 | =================================================================== | ||
110 | --- linux-ck-dev.orig/include/linux/sysctl.h 2006-06-18 15:23:38.000000000 +1000 | ||
111 | +++ linux-ck-dev/include/linux/sysctl.h 2006-06-18 15:24:48.000000000 +1000 | ||
112 | @@ -189,6 +189,7 @@ enum | ||
113 | VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ | ||
114 | VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ | ||
115 | VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */ | ||
116 | + VM_SWAP_PREFETCH=33, /* swap prefetch */ | ||
117 | }; | ||
118 | |||
119 | |||
120 | Index: linux-ck-dev/init/Kconfig | ||
121 | =================================================================== | ||
122 | --- linux-ck-dev.orig/init/Kconfig 2006-06-18 15:20:12.000000000 +1000 | ||
123 | +++ linux-ck-dev/init/Kconfig 2006-06-18 15:24:48.000000000 +1000 | ||
124 | @@ -92,6 +92,28 @@ config SWAP | ||
125 | used to provide more virtual memory than the actual RAM present | ||
126 | in your computer. If unsure say Y. | ||
127 | |||
128 | +config SWAP_PREFETCH | ||
129 | + bool "Support for prefetching swapped memory" | ||
130 | + depends on SWAP | ||
131 | + default y | ||
132 | + ---help--- | ||
133 | + This option will allow the kernel to prefetch swapped memory pages | ||
134 | + when idle. The pages will be kept on both swap and in swap_cache | ||
135 | + thus avoiding the need for further I/O if either ram or swap space | ||
136 | + is required. | ||
137 | + | ||
138 | + What this will do on workstations is slowly bring back applications | ||
139 | + that have swapped out after memory intensive workloads back into | ||
140 | + physical ram if you have free ram at a later stage and the machine | ||
141 | + is relatively idle. This means that when you come back to your | ||
142 | + computer after leaving it idle for a while, applications will come | ||
143 | + to life faster. Note that your swap usage will appear to increase | ||
144 | + but these are cached pages, can be dropped freely by the vm, and it | ||
145 | + should stabilise around 50% swap usage maximum. | ||
146 | + | ||
147 | + Workstations and multiuser workstation servers will most likely want | ||
148 | + to say Y. | ||
149 | + | ||
150 | config SYSVIPC | ||
151 | bool "System V IPC" | ||
152 | ---help--- | ||
153 | Index: linux-ck-dev/kernel/sysctl.c | ||
154 | =================================================================== | ||
155 | --- linux-ck-dev.orig/kernel/sysctl.c 2006-06-18 15:23:38.000000000 +1000 | ||
156 | +++ linux-ck-dev/kernel/sysctl.c 2006-06-18 15:24:48.000000000 +1000 | ||
157 | @@ -23,6 +23,7 @@ | ||
158 | #include <linux/mm.h> | ||
159 | #include <linux/swap.h> | ||
160 | #include <linux/slab.h> | ||
161 | +#include <linux/swap-prefetch.h> | ||
162 | #include <linux/sysctl.h> | ||
163 | #include <linux/proc_fs.h> | ||
164 | #include <linux/capability.h> | ||
165 | @@ -941,6 +942,16 @@ static ctl_table vm_table[] = { | ||
166 | .strategy = &sysctl_jiffies, | ||
167 | }, | ||
168 | #endif | ||
169 | +#ifdef CONFIG_SWAP_PREFETCH | ||
170 | + { | ||
171 | + .ctl_name = VM_SWAP_PREFETCH, | ||
172 | + .procname = "swap_prefetch", | ||
173 | + .data = &swap_prefetch, | ||
174 | + .maxlen = sizeof(swap_prefetch), | ||
175 | + .mode = 0644, | ||
176 | + .proc_handler = &proc_dointvec, | ||
177 | + }, | ||
178 | +#endif | ||
179 | { .ctl_name = 0 } | ||
180 | }; | ||
181 | |||
182 | Index: linux-ck-dev/mm/Makefile | ||
183 | =================================================================== | ||
184 | --- linux-ck-dev.orig/mm/Makefile 2006-06-18 15:20:12.000000000 +1000 | ||
185 | +++ linux-ck-dev/mm/Makefile 2006-06-18 15:24:48.000000000 +1000 | ||
186 | @@ -13,6 +13,7 @@ obj-y := bootmem.o filemap.o mempool.o | ||
187 | prio_tree.o util.o mmzone.o $(mmu-y) | ||
188 | |||
189 | obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o | ||
190 | +obj-$(CONFIG_SWAP_PREFETCH) += swap_prefetch.o | ||
191 | obj-$(CONFIG_HUGETLBFS) += hugetlb.o | ||
192 | obj-$(CONFIG_NUMA) += mempolicy.o | ||
193 | obj-$(CONFIG_SPARSEMEM) += sparse.o | ||
194 | Index: linux-ck-dev/mm/swap.c | ||
195 | =================================================================== | ||
196 | --- linux-ck-dev.orig/mm/swap.c 2006-06-18 15:20:12.000000000 +1000 | ||
197 | +++ linux-ck-dev/mm/swap.c 2006-06-18 15:24:48.000000000 +1000 | ||
198 | @@ -17,6 +17,7 @@ | ||
199 | #include <linux/sched.h> | ||
200 | #include <linux/kernel_stat.h> | ||
201 | #include <linux/swap.h> | ||
202 | +#include <linux/swap-prefetch.h> | ||
203 | #include <linux/mman.h> | ||
204 | #include <linux/pagemap.h> | ||
205 | #include <linux/pagevec.h> | ||
206 | @@ -138,6 +139,7 @@ EXPORT_SYMBOL(mark_page_accessed); | ||
207 | */ | ||
208 | static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, }; | ||
209 | static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, }; | ||
210 | +static DEFINE_PER_CPU(struct pagevec, lru_add_tail_pvecs) = { 0, }; | ||
211 | |||
212 | void fastcall lru_cache_add(struct page *page) | ||
213 | { | ||
214 | @@ -159,6 +161,31 @@ void fastcall lru_cache_add_active(struc | ||
215 | put_cpu_var(lru_add_active_pvecs); | ||
216 | } | ||
217 | |||
218 | +static void __pagevec_lru_add_tail(struct pagevec *pvec) | ||
219 | +{ | ||
220 | + int i; | ||
221 | + struct zone *zone = NULL; | ||
222 | + | ||
223 | + for (i = 0; i < pagevec_count(pvec); i++) { | ||
224 | + struct page *page = pvec->pages[i]; | ||
225 | + struct zone *pagezone = page_zone(page); | ||
226 | + | ||
227 | + if (pagezone != zone) { | ||
228 | + if (zone) | ||
229 | + spin_unlock_irq(&zone->lru_lock); | ||
230 | + zone = pagezone; | ||
231 | + spin_lock_irq(&zone->lru_lock); | ||
232 | + } | ||
233 | + BUG_ON(PageLRU(page)); | ||
234 | + SetPageLRU(page); | ||
235 | + add_page_to_inactive_list_tail(zone, page); | ||
236 | + } | ||
237 | + if (zone) | ||
238 | + spin_unlock_irq(&zone->lru_lock); | ||
239 | + release_pages(pvec->pages, pvec->nr, pvec->cold); | ||
240 | + pagevec_reinit(pvec); | ||
241 | +} | ||
242 | + | ||
243 | static void __lru_add_drain(int cpu) | ||
244 | { | ||
245 | struct pagevec *pvec = &per_cpu(lru_add_pvecs, cpu); | ||
246 | @@ -169,6 +196,9 @@ static void __lru_add_drain(int cpu) | ||
247 | pvec = &per_cpu(lru_add_active_pvecs, cpu); | ||
248 | if (pagevec_count(pvec)) | ||
249 | __pagevec_lru_add_active(pvec); | ||
250 | + pvec = &per_cpu(lru_add_tail_pvecs, cpu); | ||
251 | + if (pagevec_count(pvec)) | ||
252 | + __pagevec_lru_add_tail(pvec); | ||
253 | } | ||
254 | |||
255 | void lru_add_drain(void) | ||
256 | @@ -385,6 +415,21 @@ void __pagevec_lru_add_active(struct pag | ||
257 | } | ||
258 | |||
259 | /* | ||
260 | + * Function used uniquely to put pages back to the lru at the end of the | ||
261 | + * inactive list to preserve the lru order. Currently only used by swap | ||
262 | + * prefetch. | ||
263 | + */ | ||
264 | +void fastcall lru_cache_add_tail(struct page *page) | ||
265 | +{ | ||
266 | + struct pagevec *pvec = &get_cpu_var(lru_add_tail_pvecs); | ||
267 | + | ||
268 | + page_cache_get(page); | ||
269 | + if (!pagevec_add(pvec, page)) | ||
270 | + __pagevec_lru_add_tail(pvec); | ||
271 | + put_cpu_var(lru_add_pvecs); | ||
272 | +} | ||
273 | + | ||
274 | +/* | ||
275 | * Try to drop buffers from the pages in a pagevec | ||
276 | */ | ||
277 | void pagevec_strip(struct pagevec *pvec) | ||
278 | @@ -538,5 +583,8 @@ void __init swap_setup(void) | ||
279 | * Right now other parts of the system means that we | ||
280 | * _really_ don't want to cluster much more | ||
281 | */ | ||
282 | + | ||
283 | + prepare_swap_prefetch(); | ||
284 | + | ||
285 | hotcpu_notifier(cpu_swap_callback, 0); | ||
286 | } | ||
287 | Index: linux-ck-dev/mm/swap_prefetch.c | ||
288 | =================================================================== | ||
289 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | ||
290 | +++ linux-ck-dev/mm/swap_prefetch.c 2006-06-18 15:24:48.000000000 +1000 | ||
291 | @@ -0,0 +1,579 @@ | ||
292 | +/* | ||
293 | + * linux/mm/swap_prefetch.c | ||
294 | + * | ||
295 | + * Copyright (C) 2005-2006 Con Kolivas | ||
296 | + * | ||
297 | + * Written by Con Kolivas <kernel@kolivas.org> | ||
298 | + * | ||
299 | + * This program is free software; you can redistribute it and/or modify | ||
300 | + * it under the terms of the GNU General Public License version 2 as | ||
301 | + * published by the Free Software Foundation. | ||
302 | + */ | ||
303 | + | ||
304 | +#include <linux/fs.h> | ||
305 | +#include <linux/mm.h> | ||
306 | +#include <linux/swap.h> | ||
307 | +#include <linux/swap-prefetch.h> | ||
308 | +#include <linux/ioprio.h> | ||
309 | +#include <linux/kthread.h> | ||
310 | +#include <linux/pagemap.h> | ||
311 | +#include <linux/syscalls.h> | ||
312 | +#include <linux/writeback.h> | ||
313 | + | ||
314 | +/* | ||
315 | + * Time to delay prefetching if vm is busy or prefetching unsuccessful. There | ||
316 | + * needs to be at least this duration of idle time meaning in practice it can | ||
317 | + * be much longer | ||
318 | + */ | ||
319 | +#define PREFETCH_DELAY (HZ * 5) | ||
320 | + | ||
321 | +/* sysctl - enable/disable swap prefetching */ | ||
322 | +int swap_prefetch __read_mostly = 1; | ||
323 | + | ||
324 | +struct swapped_root { | ||
325 | + unsigned long busy; /* vm busy */ | ||
326 | + spinlock_t lock; /* protects all data */ | ||
327 | + struct list_head list; /* MRU list of swapped pages */ | ||
328 | + struct radix_tree_root swap_tree; /* Lookup tree of pages */ | ||
329 | + unsigned int count; /* Number of entries */ | ||
330 | + unsigned int maxcount; /* Maximum entries allowed */ | ||
331 | + kmem_cache_t *cache; /* Of struct swapped_entry */ | ||
332 | +}; | ||
333 | + | ||
334 | +static struct swapped_root swapped = { | ||
335 | + .lock = SPIN_LOCK_UNLOCKED, | ||
336 | + .list = LIST_HEAD_INIT(swapped.list), | ||
337 | + .swap_tree = RADIX_TREE_INIT(GFP_ATOMIC), | ||
338 | +}; | ||
339 | + | ||
340 | +static task_t *kprefetchd_task; | ||
341 | + | ||
342 | +/* | ||
343 | + * We check to see no part of the vm is busy. If it is this will interrupt | ||
344 | + * trickle_swap and wait another PREFETCH_DELAY. Purposefully racy. | ||
345 | + */ | ||
346 | +inline void delay_swap_prefetch(void) | ||
347 | +{ | ||
348 | + if (!test_bit(0, &swapped.busy)) | ||
349 | + __set_bit(0, &swapped.busy); | ||
350 | +} | ||
351 | + | ||
352 | +/* | ||
353 | + * Drop behind accounting which keeps a list of the most recently used swap | ||
354 | + * entries. | ||
355 | + */ | ||
356 | +void add_to_swapped_list(struct page *page) | ||
357 | +{ | ||
358 | + struct swapped_entry *entry; | ||
359 | + unsigned long index, flags; | ||
360 | + int wakeup; | ||
361 | + | ||
362 | + if (!swap_prefetch) | ||
363 | + return; | ||
364 | + | ||
365 | + wakeup = 0; | ||
366 | + | ||
367 | + spin_lock_irqsave(&swapped.lock, flags); | ||
368 | + if (swapped.count >= swapped.maxcount) { | ||
369 | + /* | ||
370 | + * We limit the number of entries to 2/3 of physical ram. | ||
371 | + * Once the number of entries exceeds this we start removing | ||
372 | + * the least recently used entries. | ||
373 | + */ | ||
374 | + entry = list_entry(swapped.list.next, | ||
375 | + struct swapped_entry, swapped_list); | ||
376 | + radix_tree_delete(&swapped.swap_tree, entry->swp_entry.val); | ||
377 | + list_del(&entry->swapped_list); | ||
378 | + swapped.count--; | ||
379 | + } else { | ||
380 | + entry = kmem_cache_alloc(swapped.cache, GFP_ATOMIC); | ||
381 | + if (unlikely(!entry)) | ||
382 | + /* bad, can't allocate more mem */ | ||
383 | + goto out_locked; | ||
384 | + } | ||
385 | + | ||
386 | + index = page_private(page); | ||
387 | + entry->swp_entry.val = index; | ||
388 | + /* | ||
389 | + * On numa we need to store the node id to ensure that we prefetch to | ||
390 | + * the same node it came from. | ||
391 | + */ | ||
392 | + store_swap_entry_node(entry, page); | ||
393 | + | ||
394 | + if (likely(!radix_tree_insert(&swapped.swap_tree, index, entry))) { | ||
395 | + /* | ||
396 | + * If this is the first entry, kprefetchd needs to be | ||
397 | + * (re)started. | ||
398 | + */ | ||
399 | + if (!swapped.count) | ||
400 | + wakeup = 1; | ||
401 | + list_add(&entry->swapped_list, &swapped.list); | ||
402 | + swapped.count++; | ||
403 | + } | ||
404 | + | ||
405 | +out_locked: | ||
406 | + spin_unlock_irqrestore(&swapped.lock, flags); | ||
407 | + | ||
408 | + /* Do the wakeup outside the lock to shorten lock hold time. */ | ||
409 | + if (wakeup) | ||
410 | + wake_up_process(kprefetchd_task); | ||
411 | + | ||
412 | + return; | ||
413 | +} | ||
414 | + | ||
415 | +/* | ||
416 | + * Removes entries from the swapped_list. The radix tree allows us to quickly | ||
417 | + * look up the entry from the index without having to iterate over the whole | ||
418 | + * list. | ||
419 | + */ | ||
420 | +void remove_from_swapped_list(const unsigned long index) | ||
421 | +{ | ||
422 | + struct swapped_entry *entry; | ||
423 | + unsigned long flags; | ||
424 | + | ||
425 | + if (list_empty(&swapped.list)) | ||
426 | + return; | ||
427 | + | ||
428 | + spin_lock_irqsave(&swapped.lock, flags); | ||
429 | + entry = radix_tree_delete(&swapped.swap_tree, index); | ||
430 | + if (likely(entry)) { | ||
431 | + list_del_init(&entry->swapped_list); | ||
432 | + swapped.count--; | ||
433 | + kmem_cache_free(swapped.cache, entry); | ||
434 | + } | ||
435 | + spin_unlock_irqrestore(&swapped.lock, flags); | ||
436 | +} | ||
437 | + | ||
438 | +enum trickle_return { | ||
439 | + TRICKLE_SUCCESS, | ||
440 | + TRICKLE_FAILED, | ||
441 | + TRICKLE_DELAY, | ||
442 | +}; | ||
443 | + | ||
444 | +struct node_stats { | ||
445 | + unsigned long last_free; | ||
446 | + /* Free ram after a cycle of prefetching */ | ||
447 | + unsigned long current_free; | ||
448 | + /* Free ram on this cycle of checking prefetch_suitable */ | ||
449 | + unsigned long prefetch_watermark; | ||
450 | + /* Maximum amount we will prefetch to */ | ||
451 | + unsigned long highfree[MAX_NR_ZONES]; | ||
452 | + /* The amount of free ram before we start prefetching */ | ||
453 | + unsigned long lowfree[MAX_NR_ZONES]; | ||
454 | + /* The amount of free ram where we will stop prefetching */ | ||
455 | + unsigned long *pointfree[MAX_NR_ZONES]; | ||
456 | + /* highfree or lowfree depending on whether we've hit a watermark */ | ||
457 | +}; | ||
458 | + | ||
459 | +/* | ||
460 | + * prefetch_stats stores the free ram data of each node and this is used to | ||
461 | + * determine if a node is suitable for prefetching into. | ||
462 | + */ | ||
463 | +struct prefetch_stats { | ||
464 | + nodemask_t prefetch_nodes; | ||
465 | + /* Which nodes are currently suited to prefetching */ | ||
466 | + unsigned long prefetched_pages; | ||
467 | + /* Total pages we've prefetched on this wakeup of kprefetchd */ | ||
468 | + struct node_stats node[MAX_NUMNODES]; | ||
469 | +}; | ||
470 | + | ||
471 | +static struct prefetch_stats sp_stat; | ||
472 | + | ||
473 | +/* | ||
474 | + * This tries to read a swp_entry_t into swap cache for swap prefetching. | ||
475 | + * If it returns TRICKLE_DELAY we should delay further prefetching. | ||
476 | + */ | ||
477 | +static enum trickle_return trickle_swap_cache_async(const swp_entry_t entry, | ||
478 | + const int node) | ||
479 | +{ | ||
480 | + enum trickle_return ret = TRICKLE_FAILED; | ||
481 | + struct page *page; | ||
482 | + | ||
483 | + read_lock_irq(&swapper_space.tree_lock); | ||
484 | + /* Entry may already exist */ | ||
485 | + page = radix_tree_lookup(&swapper_space.page_tree, entry.val); | ||
486 | + read_unlock_irq(&swapper_space.tree_lock); | ||
487 | + if (page) { | ||
488 | + remove_from_swapped_list(entry.val); | ||
489 | + goto out; | ||
490 | + } | ||
491 | + | ||
492 | + /* | ||
493 | + * Get a new page to read from swap. We have already checked the | ||
494 | + * watermarks so __alloc_pages will not call on reclaim. | ||
495 | + */ | ||
496 | + page = alloc_pages_node(node, GFP_HIGHUSER & ~__GFP_WAIT, 0); | ||
497 | + if (unlikely(!page)) { | ||
498 | + ret = TRICKLE_DELAY; | ||
499 | + goto out; | ||
500 | + } | ||
501 | + | ||
502 | + if (add_to_swap_cache(page, entry)) { | ||
503 | + /* Failed to add to swap cache */ | ||
504 | + goto out_release; | ||
505 | + } | ||
506 | + | ||
507 | + /* Add them to the tail of the inactive list to preserve LRU order */ | ||
508 | + lru_cache_add_tail(page); | ||
509 | + if (unlikely(swap_readpage(NULL, page))) { | ||
510 | + ret = TRICKLE_DELAY; | ||
511 | + goto out_release; | ||
512 | + } | ||
513 | + | ||
514 | + sp_stat.prefetched_pages++; | ||
515 | + sp_stat.node[node].last_free--; | ||
516 | + | ||
517 | + ret = TRICKLE_SUCCESS; | ||
518 | +out_release: | ||
519 | + page_cache_release(page); | ||
520 | +out: | ||
521 | + return ret; | ||
522 | +} | ||
523 | + | ||
524 | +static void clear_last_prefetch_free(void) | ||
525 | +{ | ||
526 | + int node; | ||
527 | + | ||
528 | + /* | ||
529 | + * Reset the nodes suitable for prefetching to all nodes. We could | ||
530 | + * update the data to take into account memory hotplug if desired.. | ||
531 | + */ | ||
532 | + sp_stat.prefetch_nodes = node_online_map; | ||
533 | + for_each_node_mask(node, sp_stat.prefetch_nodes) { | ||
534 | + struct node_stats *ns = &sp_stat.node[node]; | ||
535 | + | ||
536 | + ns->last_free = 0; | ||
537 | + } | ||
538 | +} | ||
539 | + | ||
540 | +static void clear_current_prefetch_free(void) | ||
541 | +{ | ||
542 | + int node; | ||
543 | + | ||
544 | + sp_stat.prefetch_nodes = node_online_map; | ||
545 | + for_each_node_mask(node, sp_stat.prefetch_nodes) { | ||
546 | + struct node_stats *ns = &sp_stat.node[node]; | ||
547 | + | ||
548 | + ns->current_free = 0; | ||
549 | + } | ||
550 | +} | ||
551 | + | ||
552 | +/* | ||
553 | + * This updates the high and low watermarks of amount of free ram in each | ||
554 | + * node used to start and stop prefetching. We prefetch from pages_high * 4 | ||
555 | + * down to pages_high * 3. | ||
556 | + */ | ||
557 | +static void examine_free_limits(void) | ||
558 | +{ | ||
559 | + struct zone *z; | ||
560 | + | ||
561 | + for_each_zone(z) { | ||
562 | + struct node_stats *ns; | ||
563 | + int idx; | ||
564 | + | ||
565 | + if (!populated_zone(z)) | ||
566 | + continue; | ||
567 | + | ||
568 | + ns = &sp_stat.node[z->zone_pgdat->node_id]; | ||
569 | + idx = zone_idx(z); | ||
570 | + ns->lowfree[idx] = z->pages_high * 3 + | ||
571 | + z->lowmem_reserve[ZONE_HIGHMEM]; | ||
572 | + ns->highfree[idx] = ns->lowfree[idx] + z->pages_high; | ||
573 | + | ||
574 | + if (z->free_pages > ns->highfree[idx]) { | ||
575 | + /* | ||
576 | + * We've gotten above the high watermark of free pages | ||
577 | + * so we can start prefetching till we get to the low | ||
578 | + * watermark. | ||
579 | + */ | ||
580 | + ns->pointfree[idx] = &ns->lowfree[idx]; | ||
581 | + } | ||
582 | + } | ||
583 | +} | ||
584 | + | ||
585 | +/* | ||
586 | + * We want to be absolutely certain it's ok to start prefetching. | ||
587 | + */ | ||
588 | +static int prefetch_suitable(void) | ||
589 | +{ | ||
590 | + unsigned long limit; | ||
591 | + struct zone *z; | ||
592 | + int node, ret = 0, test_pagestate = 0; | ||
593 | + | ||
594 | + /* Purposefully racy */ | ||
595 | + if (test_bit(0, &swapped.busy)) { | ||
596 | + __clear_bit(0, &swapped.busy); | ||
597 | + goto out; | ||
598 | + } | ||
599 | + | ||
600 | + /* | ||
601 | + * get_page_state and above_background_load are expensive so we only | ||
602 | + * perform them every SWAP_CLUSTER_MAX prefetched_pages. | ||
603 | + * We test to see if we're above_background_load as disk activity | ||
604 | + * even at low priority can cause interrupt induced scheduling | ||
605 | + * latencies. | ||
606 | + */ | ||
607 | + if (!(sp_stat.prefetched_pages % SWAP_CLUSTER_MAX)) { | ||
608 | + if (above_background_load()) | ||
609 | + goto out; | ||
610 | + test_pagestate = 1; | ||
611 | + } | ||
612 | + | ||
613 | + clear_current_prefetch_free(); | ||
614 | + | ||
615 | + /* | ||
616 | + * Have some hysteresis between where page reclaiming and prefetching | ||
617 | + * will occur to prevent ping-ponging between them. | ||
618 | + */ | ||
619 | + for_each_zone(z) { | ||
620 | + struct node_stats *ns; | ||
621 | + unsigned long free; | ||
622 | + int idx; | ||
623 | + | ||
624 | + if (!populated_zone(z)) | ||
625 | + continue; | ||
626 | + | ||
627 | + node = z->zone_pgdat->node_id; | ||
628 | + ns = &sp_stat.node[node]; | ||
629 | + idx = zone_idx(z); | ||
630 | + | ||
631 | + free = z->free_pages; | ||
632 | + if (free < *ns->pointfree[idx]) { | ||
633 | + /* | ||
634 | + * Free pages have dropped below the low watermark so | ||
635 | + * we won't start prefetching again till we hit the | ||
636 | + * high watermark of free pages. | ||
637 | + */ | ||
638 | + ns->pointfree[idx] = &ns->highfree[idx]; | ||
639 | + node_clear(node, sp_stat.prefetch_nodes); | ||
640 | + continue; | ||
641 | + } | ||
642 | + ns->current_free += free; | ||
643 | + } | ||
644 | + | ||
645 | + /* | ||
646 | + * We iterate over each node testing to see if it is suitable for | ||
647 | + * prefetching and clear the nodemask if it is not. | ||
648 | + */ | ||
649 | + for_each_node_mask(node, sp_stat.prefetch_nodes) { | ||
650 | + struct node_stats *ns = &sp_stat.node[node]; | ||
651 | + struct page_state ps; | ||
652 | + | ||
653 | + /* | ||
654 | + * We check to see that pages are not being allocated | ||
655 | + * elsewhere at any significant rate implying any | ||
656 | + * degree of memory pressure (eg during file reads) | ||
657 | + */ | ||
658 | + if (ns->last_free) { | ||
659 | + if (ns->current_free + SWAP_CLUSTER_MAX < | ||
660 | + ns->last_free) { | ||
661 | + ns->last_free = ns->current_free; | ||
662 | + node_clear(node, | ||
663 | + sp_stat.prefetch_nodes); | ||
664 | + continue; | ||
665 | + } | ||
666 | + } else | ||
667 | + ns->last_free = ns->current_free; | ||
668 | + | ||
669 | + if (!test_pagestate) | ||
670 | + continue; | ||
671 | + | ||
672 | + get_page_state_node(&ps, node); | ||
673 | + | ||
674 | + /* We shouldn't prefetch when we are doing writeback */ | ||
675 | + if (ps.nr_writeback) { | ||
676 | + node_clear(node, sp_stat.prefetch_nodes); | ||
677 | + continue; | ||
678 | + } | ||
679 | + | ||
680 | + /* | ||
681 | + * >2/3 of the ram on this node is mapped, slab, swapcache or | ||
682 | + * dirty, we need to leave some free for pagecache. | ||
683 | + * Note that currently nr_slab is innacurate on numa because | ||
684 | + * nr_slab is incremented on the node doing the accounting | ||
685 | + * even if the slab is being allocated on a remote node. This | ||
686 | + * would be expensive to fix and not of great significance. | ||
687 | + */ | ||
688 | + limit = ps.nr_mapped + ps.nr_slab + ps.nr_dirty + | ||
689 | + ps.nr_unstable + total_swapcache_pages; | ||
690 | + if (limit > ns->prefetch_watermark) { | ||
691 | + node_clear(node, sp_stat.prefetch_nodes); | ||
692 | + continue; | ||
693 | + } | ||
694 | + } | ||
695 | + | ||
696 | + if (nodes_empty(sp_stat.prefetch_nodes)) | ||
697 | + goto out; | ||
698 | + | ||
699 | + /* Survived all that? Hooray we can prefetch! */ | ||
700 | + ret = 1; | ||
701 | +out: | ||
702 | + return ret; | ||
703 | +} | ||
704 | + | ||
705 | +/* | ||
706 | + * Get previous swapped entry when iterating over all entries. swapped.lock | ||
707 | + * should be held and we should already ensure that entry exists. | ||
708 | + */ | ||
709 | +static inline struct swapped_entry *prev_swapped_entry | ||
710 | + (struct swapped_entry *entry) | ||
711 | +{ | ||
712 | + return list_entry(entry->swapped_list.prev->prev, | ||
713 | + struct swapped_entry, swapped_list); | ||
714 | +} | ||
715 | + | ||
716 | +/* | ||
717 | + * trickle_swap is the main function that initiates the swap prefetching. It | ||
718 | + * first checks to see if the busy flag is set, and does not prefetch if it | ||
719 | + * is, as the flag implied we are low on memory or swapping in currently. | ||
720 | + * Otherwise it runs until prefetch_suitable fails which occurs when the | ||
721 | + * vm is busy, we prefetch to the watermark, or the list is empty or we have | ||
722 | + * iterated over all entries | ||
723 | + */ | ||
724 | +static enum trickle_return trickle_swap(void) | ||
725 | +{ | ||
726 | + enum trickle_return ret = TRICKLE_DELAY; | ||
727 | + struct swapped_entry *entry; | ||
728 | + unsigned long flags; | ||
729 | + | ||
730 | + /* | ||
731 | + * If laptop_mode is enabled don't prefetch to avoid hard drives | ||
732 | + * doing unnecessary spin-ups | ||
733 | + */ | ||
734 | + if (!swap_prefetch || laptop_mode) | ||
735 | + return ret; | ||
736 | + | ||
737 | + examine_free_limits(); | ||
738 | + entry = NULL; | ||
739 | + | ||
740 | + for ( ; ; ) { | ||
741 | + swp_entry_t swp_entry; | ||
742 | + int node; | ||
743 | + | ||
744 | + if (!prefetch_suitable()) | ||
745 | + break; | ||
746 | + | ||
747 | + spin_lock_irqsave(&swapped.lock, flags); | ||
748 | + if (list_empty(&swapped.list)) { | ||
749 | + ret = TRICKLE_FAILED; | ||
750 | + spin_unlock_irqrestore(&swapped.lock, flags); | ||
751 | + break; | ||
752 | + } | ||
753 | + | ||
754 | + if (!entry) { | ||
755 | + /* | ||
756 | + * This sets the entry for the first iteration. It | ||
757 | + * also is a safeguard against the entry disappearing | ||
758 | + * while the lock is not held. | ||
759 | + */ | ||
760 | + entry = list_entry(swapped.list.prev, | ||
761 | + struct swapped_entry, swapped_list); | ||
762 | + } else if (entry->swapped_list.prev == swapped.list.next) { | ||
763 | + /* | ||
764 | + * If we have iterated over all entries and there are | ||
765 | + * still entries that weren't swapped out there may | ||
766 | + * be a reason we could not swap them back in so | ||
767 | + * delay attempting further prefetching. | ||
768 | + */ | ||
769 | + spin_unlock_irqrestore(&swapped.lock, flags); | ||
770 | + break; | ||
771 | + } | ||
772 | + | ||
773 | + node = get_swap_entry_node(entry); | ||
774 | + if (!node_isset(node, sp_stat.prefetch_nodes)) { | ||
775 | + /* | ||
776 | + * We found an entry that belongs to a node that is | ||
777 | + * not suitable for prefetching so skip it. | ||
778 | + */ | ||
779 | + entry = prev_swapped_entry(entry); | ||
780 | + spin_unlock_irqrestore(&swapped.lock, flags); | ||
781 | + continue; | ||
782 | + } | ||
783 | + swp_entry = entry->swp_entry; | ||
784 | + entry = prev_swapped_entry(entry); | ||
785 | + spin_unlock_irqrestore(&swapped.lock, flags); | ||
786 | + | ||
787 | + if (trickle_swap_cache_async(swp_entry, node) == TRICKLE_DELAY) | ||
788 | + break; | ||
789 | + } | ||
790 | + | ||
791 | + if (sp_stat.prefetched_pages) { | ||
792 | + lru_add_drain(); | ||
793 | + sp_stat.prefetched_pages = 0; | ||
794 | + } | ||
795 | + return ret; | ||
796 | +} | ||
797 | + | ||
798 | +static int kprefetchd(void *__unused) | ||
799 | +{ | ||
800 | + struct sched_param param = { .sched_priority = 0 }; | ||
801 | + | ||
802 | + sched_setscheduler(current, SCHED_BATCH, ¶m); | ||
803 | + set_user_nice(current, 19); | ||
804 | + /* Set ioprio to lowest if supported by i/o scheduler */ | ||
805 | + sys_ioprio_set(IOPRIO_WHO_PROCESS, 0, IOPRIO_CLASS_IDLE); | ||
806 | + | ||
807 | + do { | ||
808 | + try_to_freeze(); | ||
809 | + | ||
810 | + /* | ||
811 | + * TRICKLE_FAILED implies no entries left - we do not schedule | ||
812 | + * a wakeup, and further delay the next one. | ||
813 | + */ | ||
814 | + if (trickle_swap() == TRICKLE_FAILED) { | ||
815 | + set_current_state(TASK_INTERRUPTIBLE); | ||
816 | + schedule(); | ||
817 | + } | ||
818 | + clear_last_prefetch_free(); | ||
819 | + schedule_timeout_interruptible(PREFETCH_DELAY); | ||
820 | + } while (!kthread_should_stop()); | ||
821 | + | ||
822 | + return 0; | ||
823 | +} | ||
824 | + | ||
825 | +/* | ||
826 | + * Create kmem cache for swapped entries | ||
827 | + */ | ||
828 | +void __init prepare_swap_prefetch(void) | ||
829 | +{ | ||
830 | + struct zone *zone; | ||
831 | + | ||
832 | + swapped.cache = kmem_cache_create("swapped_entry", | ||
833 | + sizeof(struct swapped_entry), 0, SLAB_PANIC, NULL, NULL); | ||
834 | + | ||
835 | + /* | ||
836 | + * Set max number of entries to 2/3 the size of physical ram as we | ||
837 | + * only ever prefetch to consume 2/3 of the ram. | ||
838 | + */ | ||
839 | + swapped.maxcount = nr_free_pagecache_pages() / 3 * 2; | ||
840 | + | ||
841 | + for_each_zone(zone) { | ||
842 | + unsigned long present; | ||
843 | + struct node_stats *ns; | ||
844 | + int idx; | ||
845 | + | ||
846 | + present = zone->present_pages; | ||
847 | + if (!present) | ||
848 | + continue; | ||
849 | + | ||
850 | + ns = &sp_stat.node[zone->zone_pgdat->node_id]; | ||
851 | + ns->prefetch_watermark += present / 3 * 2; | ||
852 | + idx = zone_idx(zone); | ||
853 | + ns->pointfree[idx] = &ns->highfree[idx]; | ||
854 | + } | ||
855 | +} | ||
856 | + | ||
857 | +static int __init kprefetchd_init(void) | ||
858 | +{ | ||
859 | + kprefetchd_task = kthread_run(kprefetchd, NULL, "kprefetchd"); | ||
860 | + | ||
861 | + return 0; | ||
862 | +} | ||
863 | + | ||
864 | +static void __exit kprefetchd_exit(void) | ||
865 | +{ | ||
866 | + kthread_stop(kprefetchd_task); | ||
867 | +} | ||
868 | + | ||
869 | +module_init(kprefetchd_init); | ||
870 | +module_exit(kprefetchd_exit); | ||
871 | Index: linux-ck-dev/mm/swap_state.c | ||
872 | =================================================================== | ||
873 | --- linux-ck-dev.orig/mm/swap_state.c 2006-06-18 15:20:12.000000000 +1000 | ||
874 | +++ linux-ck-dev/mm/swap_state.c 2006-06-18 15:24:48.000000000 +1000 | ||
875 | @@ -10,6 +10,7 @@ | ||
876 | #include <linux/mm.h> | ||
877 | #include <linux/kernel_stat.h> | ||
878 | #include <linux/swap.h> | ||
879 | +#include <linux/swap-prefetch.h> | ||
880 | #include <linux/init.h> | ||
881 | #include <linux/pagemap.h> | ||
882 | #include <linux/buffer_head.h> | ||
883 | @@ -82,6 +83,7 @@ static int __add_to_swap_cache(struct pa | ||
884 | error = radix_tree_insert(&swapper_space.page_tree, | ||
885 | entry.val, page); | ||
886 | if (!error) { | ||
887 | + remove_from_swapped_list(entry.val); | ||
888 | page_cache_get(page); | ||
889 | SetPageLocked(page); | ||
890 | SetPageSwapCache(page); | ||
891 | @@ -95,11 +97,12 @@ static int __add_to_swap_cache(struct pa | ||
892 | return error; | ||
893 | } | ||
894 | |||
895 | -static int add_to_swap_cache(struct page *page, swp_entry_t entry) | ||
896 | +int add_to_swap_cache(struct page *page, swp_entry_t entry) | ||
897 | { | ||
898 | int error; | ||
899 | |||
900 | if (!swap_duplicate(entry)) { | ||
901 | + remove_from_swapped_list(entry.val); | ||
902 | INC_CACHE_INFO(noent_race); | ||
903 | return -ENOENT; | ||
904 | } | ||
905 | @@ -148,6 +151,9 @@ int add_to_swap(struct page * page, gfp_ | ||
906 | swp_entry_t entry; | ||
907 | int err; | ||
908 | |||
909 | + /* Swap prefetching is delayed if we're swapping pages */ | ||
910 | + delay_swap_prefetch(); | ||
911 | + | ||
912 | BUG_ON(!PageLocked(page)); | ||
913 | |||
914 | for (;;) { | ||
915 | @@ -320,6 +326,9 @@ struct page *read_swap_cache_async(swp_e | ||
916 | struct page *found_page, *new_page = NULL; | ||
917 | int err; | ||
918 | |||
919 | + /* Swap prefetching is delayed if we're already reading from swap */ | ||
920 | + delay_swap_prefetch(); | ||
921 | + | ||
922 | do { | ||
923 | /* | ||
924 | * First check the swap cache. Since this is normally | ||
925 | Index: linux-ck-dev/mm/vmscan.c | ||
926 | =================================================================== | ||
927 | --- linux-ck-dev.orig/mm/vmscan.c 2006-06-18 15:20:12.000000000 +1000 | ||
928 | +++ linux-ck-dev/mm/vmscan.c 2006-06-18 15:24:48.000000000 +1000 | ||
929 | @@ -16,6 +16,7 @@ | ||
930 | #include <linux/slab.h> | ||
931 | #include <linux/kernel_stat.h> | ||
932 | #include <linux/swap.h> | ||
933 | +#include <linux/swap-prefetch.h> | ||
934 | #include <linux/pagemap.h> | ||
935 | #include <linux/init.h> | ||
936 | #include <linux/highmem.h> | ||
937 | @@ -380,6 +381,7 @@ int remove_mapping(struct address_space | ||
938 | |||
939 | if (PageSwapCache(page)) { | ||
940 | swp_entry_t swap = { .val = page_private(page) }; | ||
941 | + add_to_swapped_list(page); | ||
942 | __delete_from_swap_cache(page); | ||
943 | write_unlock_irq(&mapping->tree_lock); | ||
944 | swap_free(swap); | ||
945 | @@ -959,6 +961,8 @@ unsigned long try_to_free_pages(struct z | ||
946 | .may_swap = 1, | ||
947 | }; | ||
948 | |||
949 | + delay_swap_prefetch(); | ||
950 | + | ||
951 | inc_page_state(allocstall); | ||
952 | |||
953 | for (i = 0; zones[i] != NULL; i++) { | ||
954 | @@ -1303,6 +1307,8 @@ unsigned long shrink_all_memory(unsigned | ||
955 | .reclaimed_slab = 0, | ||
956 | }; | ||
957 | |||
958 | + delay_swap_prefetch(); | ||
959 | + | ||
960 | current->reclaim_state = &reclaim_state; | ||
961 | repeat: | ||
962 | for_each_online_pgdat(pgdat) { | ||
963 | Index: linux-ck-dev/include/linux/mm_inline.h | ||
964 | =================================================================== | ||
965 | --- linux-ck-dev.orig/include/linux/mm_inline.h 2006-06-18 15:20:12.000000000 +1000 | ||
966 | +++ linux-ck-dev/include/linux/mm_inline.h 2006-06-18 15:24:48.000000000 +1000 | ||
967 | @@ -14,6 +14,13 @@ add_page_to_inactive_list(struct zone *z | ||
968 | } | ||
969 | |||
970 | static inline void | ||
971 | +add_page_to_inactive_list_tail(struct zone *zone, struct page *page) | ||
972 | +{ | ||
973 | + list_add_tail(&page->lru, &zone->inactive_list); | ||
974 | + zone->nr_inactive++; | ||
975 | +} | ||
976 | + | ||
977 | +static inline void | ||
978 | del_page_from_active_list(struct zone *zone, struct page *page) | ||
979 | { | ||
980 | list_del(&page->lru); | ||
981 | Index: linux-ck-dev/include/linux/swap-prefetch.h | ||
982 | =================================================================== | ||
983 | --- /dev/null 1970-01-01 00:00:00.000000000 +0000 | ||
984 | +++ linux-ck-dev/include/linux/swap-prefetch.h 2006-06-18 15:24:48.000000000 +1000 | ||
985 | @@ -0,0 +1,55 @@ | ||
986 | +#ifndef SWAP_PREFETCH_H_INCLUDED | ||
987 | +#define SWAP_PREFETCH_H_INCLUDED | ||
988 | + | ||
989 | +#ifdef CONFIG_SWAP_PREFETCH | ||
990 | +/* mm/swap_prefetch.c */ | ||
991 | +extern int swap_prefetch; | ||
992 | +struct swapped_entry { | ||
993 | + swp_entry_t swp_entry; /* The actual swap entry */ | ||
994 | + struct list_head swapped_list; /* Linked list of entries */ | ||
995 | +#if MAX_NUMNODES > 1 | ||
996 | + int node; /* Node id */ | ||
997 | +#endif | ||
998 | +} __attribute__((packed)); | ||
999 | + | ||
1000 | +static inline void store_swap_entry_node(struct swapped_entry *entry, | ||
1001 | + struct page *page) | ||
1002 | +{ | ||
1003 | +#if MAX_NUMNODES > 1 | ||
1004 | + entry->node = page_to_nid(page); | ||
1005 | +#endif | ||
1006 | +} | ||
1007 | + | ||
1008 | +static inline int get_swap_entry_node(struct swapped_entry *entry) | ||
1009 | +{ | ||
1010 | +#if MAX_NUMNODES > 1 | ||
1011 | + return entry->node; | ||
1012 | +#else | ||
1013 | + return 0; | ||
1014 | +#endif | ||
1015 | +} | ||
1016 | + | ||
1017 | +extern void add_to_swapped_list(struct page *page); | ||
1018 | +extern void remove_from_swapped_list(const unsigned long index); | ||
1019 | +extern void delay_swap_prefetch(void); | ||
1020 | +extern void prepare_swap_prefetch(void); | ||
1021 | + | ||
1022 | +#else /* CONFIG_SWAP_PREFETCH */ | ||
1023 | +static inline void add_to_swapped_list(struct page *__unused) | ||
1024 | +{ | ||
1025 | +} | ||
1026 | + | ||
1027 | +static inline void prepare_swap_prefetch(void) | ||
1028 | +{ | ||
1029 | +} | ||
1030 | + | ||
1031 | +static inline void remove_from_swapped_list(const unsigned long __unused) | ||
1032 | +{ | ||
1033 | +} | ||
1034 | + | ||
1035 | +static inline void delay_swap_prefetch(void) | ||
1036 | +{ | ||
1037 | +} | ||
1038 | +#endif /* CONFIG_SWAP_PREFETCH */ | ||
1039 | + | ||
1040 | +#endif /* SWAP_PREFETCH_H_INCLUDED */ |