Contents of /trunk/kernel26-alx/patches-2.6.21-r14/0024-2.6.21-mm-swap-prefetch-35-38.patch
Parent Directory | Revision Log
Revision 447 -
(show annotations)
(download)
Tue Jan 22 17:55:52 2008 UTC (16 years, 8 months ago) by niro
File size: 12448 byte(s)
Tue Jan 22 17:55:52 2008 UTC (16 years, 8 months ago) by niro
File size: 12448 byte(s)
-2.6.21-alx-r14 - fixed some natsemi errors on wys terminals
1 | --- |
2 | Documentation/sysctl/vm.txt | 5 - |
3 | mm/page_io.c | 2 |
4 | mm/swap_prefetch.c | 167 +++++++++++++++++++------------------------- |
5 | mm/swap_state.c | 2 |
6 | mm/vmscan.c | 1 |
7 | 5 files changed, 79 insertions(+), 98 deletions(-) |
8 | |
9 | Index: linux-2.6.21-ck2/mm/page_io.c |
10 | =================================================================== |
11 | --- linux-2.6.21-ck2.orig/mm/page_io.c 2007-05-14 19:49:18.000000000 +1000 |
12 | +++ linux-2.6.21-ck2/mm/page_io.c 2007-05-14 19:49:57.000000000 +1000 |
13 | @@ -17,6 +17,7 @@ |
14 | #include <linux/bio.h> |
15 | #include <linux/swapops.h> |
16 | #include <linux/writeback.h> |
17 | +#include <linux/swap-prefetch.h> |
18 | #include <asm/pgtable.h> |
19 | |
20 | static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index, |
21 | @@ -118,6 +119,7 @@ int swap_writepage(struct page *page, st |
22 | ret = -ENOMEM; |
23 | goto out; |
24 | } |
25 | + add_to_swapped_list(page); |
26 | if (wbc->sync_mode == WB_SYNC_ALL) |
27 | rw |= (1 << BIO_RW_SYNC); |
28 | count_vm_event(PSWPOUT); |
29 | Index: linux-2.6.21-ck2/mm/swap_state.c |
30 | =================================================================== |
31 | --- linux-2.6.21-ck2.orig/mm/swap_state.c 2007-05-14 19:49:55.000000000 +1000 |
32 | +++ linux-2.6.21-ck2/mm/swap_state.c 2007-05-14 19:49:57.000000000 +1000 |
33 | @@ -83,7 +83,6 @@ static int __add_to_swap_cache(struct pa |
34 | error = radix_tree_insert(&swapper_space.page_tree, |
35 | entry.val, page); |
36 | if (!error) { |
37 | - remove_from_swapped_list(entry.val); |
38 | page_cache_get(page); |
39 | SetPageLocked(page); |
40 | SetPageSwapCache(page); |
41 | @@ -102,7 +101,6 @@ int add_to_swap_cache(struct page *page, |
42 | int error; |
43 | |
44 | if (!swap_duplicate(entry)) { |
45 | - remove_from_swapped_list(entry.val); |
46 | INC_CACHE_INFO(noent_race); |
47 | return -ENOENT; |
48 | } |
49 | Index: linux-2.6.21-ck2/mm/vmscan.c |
50 | =================================================================== |
51 | --- linux-2.6.21-ck2.orig/mm/vmscan.c 2007-05-14 19:49:56.000000000 +1000 |
52 | +++ linux-2.6.21-ck2/mm/vmscan.c 2007-05-14 19:49:57.000000000 +1000 |
53 | @@ -427,7 +427,6 @@ int remove_mapping(struct address_space |
54 | |
55 | if (PageSwapCache(page)) { |
56 | swp_entry_t swap = { .val = page_private(page) }; |
57 | - add_to_swapped_list(page); |
58 | __delete_from_swap_cache(page); |
59 | write_unlock_irq(&mapping->tree_lock); |
60 | swap_free(swap); |
61 | Index: linux-2.6.21-ck2/mm/swap_prefetch.c |
62 | =================================================================== |
63 | --- linux-2.6.21-ck2.orig/mm/swap_prefetch.c 2007-05-14 19:49:55.000000000 +1000 |
64 | +++ linux-2.6.21-ck2/mm/swap_prefetch.c 2007-05-14 19:49:57.000000000 +1000 |
65 | @@ -27,7 +27,8 @@ |
66 | * needs to be at least this duration of idle time meaning in practice it can |
67 | * be much longer |
68 | */ |
69 | -#define PREFETCH_DELAY (HZ * 5) |
70 | +#define PREFETCH_DELAY (HZ * 5) |
71 | +#define DISABLED_PREFETCH_DELAY (HZ * 60) |
72 | |
73 | /* sysctl - enable/disable swap prefetching */ |
74 | int swap_prefetch __read_mostly = 1; |
75 | @@ -61,19 +62,30 @@ inline void delay_swap_prefetch(void) |
76 | } |
77 | |
78 | /* |
79 | + * If laptop_mode is enabled don't prefetch to avoid hard drives |
80 | + * doing unnecessary spin-ups unless swap_prefetch is explicitly |
81 | + * set to a higher value. |
82 | + */ |
83 | +static inline int prefetch_enabled(void) |
84 | +{ |
85 | + if (swap_prefetch <= laptop_mode) |
86 | + return 0; |
87 | + return 1; |
88 | +} |
89 | + |
90 | +static int wakeup_kprefetchd; |
91 | + |
92 | +/* |
93 | * Drop behind accounting which keeps a list of the most recently used swap |
94 | - * entries. |
95 | + * entries. Entries are removed lazily by kprefetchd. |
96 | */ |
97 | void add_to_swapped_list(struct page *page) |
98 | { |
99 | struct swapped_entry *entry; |
100 | unsigned long index, flags; |
101 | - int wakeup; |
102 | - |
103 | - if (!swap_prefetch) |
104 | - return; |
105 | |
106 | - wakeup = 0; |
107 | + if (!prefetch_enabled()) |
108 | + goto out; |
109 | |
110 | spin_lock_irqsave(&swapped.lock, flags); |
111 | if (swapped.count >= swapped.maxcount) { |
112 | @@ -103,23 +115,16 @@ void add_to_swapped_list(struct page *pa |
113 | store_swap_entry_node(entry, page); |
114 | |
115 | if (likely(!radix_tree_insert(&swapped.swap_tree, index, entry))) { |
116 | - /* |
117 | - * If this is the first entry, kprefetchd needs to be |
118 | - * (re)started. |
119 | - */ |
120 | - if (!swapped.count) |
121 | - wakeup = 1; |
122 | list_add(&entry->swapped_list, &swapped.list); |
123 | swapped.count++; |
124 | - } |
125 | + } else |
126 | + kmem_cache_free(swapped.cache, entry); |
127 | |
128 | out_locked: |
129 | spin_unlock_irqrestore(&swapped.lock, flags); |
130 | - |
131 | - /* Do the wakeup outside the lock to shorten lock hold time. */ |
132 | - if (wakeup) |
133 | +out: |
134 | + if (wakeup_kprefetchd) |
135 | wake_up_process(kprefetchd_task); |
136 | - |
137 | return; |
138 | } |
139 | |
140 | @@ -139,7 +144,7 @@ void remove_from_swapped_list(const unsi |
141 | spin_lock_irqsave(&swapped.lock, flags); |
142 | entry = radix_tree_delete(&swapped.swap_tree, index); |
143 | if (likely(entry)) { |
144 | - list_del_init(&entry->swapped_list); |
145 | + list_del(&entry->swapped_list); |
146 | swapped.count--; |
147 | kmem_cache_free(swapped.cache, entry); |
148 | } |
149 | @@ -153,18 +158,18 @@ enum trickle_return { |
150 | }; |
151 | |
152 | struct node_stats { |
153 | - unsigned long last_free; |
154 | /* Free ram after a cycle of prefetching */ |
155 | - unsigned long current_free; |
156 | + unsigned long last_free; |
157 | /* Free ram on this cycle of checking prefetch_suitable */ |
158 | - unsigned long prefetch_watermark; |
159 | + unsigned long current_free; |
160 | /* Maximum amount we will prefetch to */ |
161 | - unsigned long highfree[MAX_NR_ZONES]; |
162 | + unsigned long prefetch_watermark; |
163 | /* The amount of free ram before we start prefetching */ |
164 | - unsigned long lowfree[MAX_NR_ZONES]; |
165 | + unsigned long highfree[MAX_NR_ZONES]; |
166 | /* The amount of free ram where we will stop prefetching */ |
167 | - unsigned long *pointfree[MAX_NR_ZONES]; |
168 | + unsigned long lowfree[MAX_NR_ZONES]; |
169 | /* highfree or lowfree depending on whether we've hit a watermark */ |
170 | + unsigned long *pointfree[MAX_NR_ZONES]; |
171 | }; |
172 | |
173 | /* |
174 | @@ -172,10 +177,10 @@ struct node_stats { |
175 | * determine if a node is suitable for prefetching into. |
176 | */ |
177 | struct prefetch_stats { |
178 | - nodemask_t prefetch_nodes; |
179 | /* Which nodes are currently suited to prefetching */ |
180 | - unsigned long prefetched_pages; |
181 | + nodemask_t prefetch_nodes; |
182 | /* Total pages we've prefetched on this wakeup of kprefetchd */ |
183 | + unsigned long prefetched_pages; |
184 | struct node_stats node[MAX_NUMNODES]; |
185 | }; |
186 | |
187 | @@ -189,16 +194,15 @@ static enum trickle_return trickle_swap_ |
188 | const int node) |
189 | { |
190 | enum trickle_return ret = TRICKLE_FAILED; |
191 | + unsigned long flags; |
192 | struct page *page; |
193 | |
194 | - read_lock_irq(&swapper_space.tree_lock); |
195 | + read_lock_irqsave(&swapper_space.tree_lock, flags); |
196 | /* Entry may already exist */ |
197 | page = radix_tree_lookup(&swapper_space.page_tree, entry.val); |
198 | - read_unlock_irq(&swapper_space.tree_lock); |
199 | - if (page) { |
200 | - remove_from_swapped_list(entry.val); |
201 | + read_unlock_irqrestore(&swapper_space.tree_lock, flags); |
202 | + if (page) |
203 | goto out; |
204 | - } |
205 | |
206 | /* |
207 | * Get a new page to read from swap. We have already checked the |
208 | @@ -217,10 +221,8 @@ static enum trickle_return trickle_swap_ |
209 | |
210 | /* Add them to the tail of the inactive list to preserve LRU order */ |
211 | lru_cache_add_tail(page); |
212 | - if (unlikely(swap_readpage(NULL, page))) { |
213 | - ret = TRICKLE_DELAY; |
214 | + if (unlikely(swap_readpage(NULL, page))) |
215 | goto out_release; |
216 | - } |
217 | |
218 | sp_stat.prefetched_pages++; |
219 | sp_stat.node[node].last_free--; |
220 | @@ -229,6 +231,12 @@ static enum trickle_return trickle_swap_ |
221 | out_release: |
222 | page_cache_release(page); |
223 | out: |
224 | + /* |
225 | + * All entries are removed here lazily. This avoids the cost of |
226 | + * remove_from_swapped_list during normal swapin. Thus there are |
227 | + * usually many stale entries. |
228 | + */ |
229 | + remove_from_swapped_list(entry.val); |
230 | return ret; |
231 | } |
232 | |
233 | @@ -410,17 +418,6 @@ out: |
234 | } |
235 | |
236 | /* |
237 | - * Get previous swapped entry when iterating over all entries. swapped.lock |
238 | - * should be held and we should already ensure that entry exists. |
239 | - */ |
240 | -static inline struct swapped_entry *prev_swapped_entry |
241 | - (struct swapped_entry *entry) |
242 | -{ |
243 | - return list_entry(entry->swapped_list.prev->prev, |
244 | - struct swapped_entry, swapped_list); |
245 | -} |
246 | - |
247 | -/* |
248 | * trickle_swap is the main function that initiates the swap prefetching. It |
249 | * first checks to see if the busy flag is set, and does not prefetch if it |
250 | * is, as the flag implied we are low on memory or swapping in currently. |
251 | @@ -431,70 +428,49 @@ static inline struct swapped_entry *prev |
252 | static enum trickle_return trickle_swap(void) |
253 | { |
254 | enum trickle_return ret = TRICKLE_DELAY; |
255 | - struct swapped_entry *entry; |
256 | + struct swapped_entry *pos, *n; |
257 | unsigned long flags; |
258 | |
259 | - /* |
260 | - * If laptop_mode is enabled don't prefetch to avoid hard drives |
261 | - * doing unnecessary spin-ups |
262 | - */ |
263 | - if (!swap_prefetch || laptop_mode) |
264 | + if (!prefetch_enabled()) |
265 | return ret; |
266 | |
267 | examine_free_limits(); |
268 | - entry = NULL; |
269 | + if (!prefetch_suitable()) |
270 | + return ret; |
271 | + if (list_empty(&swapped.list)) |
272 | + return TRICKLE_FAILED; |
273 | |
274 | - for ( ; ; ) { |
275 | + spin_lock_irqsave(&swapped.lock, flags); |
276 | + list_for_each_entry_safe_reverse(pos, n, &swapped.list, swapped_list) { |
277 | swp_entry_t swp_entry; |
278 | int node; |
279 | |
280 | - if (!prefetch_suitable()) |
281 | - break; |
282 | + spin_unlock_irqrestore(&swapped.lock, flags); |
283 | + /* Yield to anything else running */ |
284 | + if (cond_resched() || !prefetch_suitable()) |
285 | + goto out_unlocked; |
286 | |
287 | spin_lock_irqsave(&swapped.lock, flags); |
288 | - if (list_empty(&swapped.list)) { |
289 | - ret = TRICKLE_FAILED; |
290 | - spin_unlock_irqrestore(&swapped.lock, flags); |
291 | - break; |
292 | - } |
293 | - |
294 | - if (!entry) { |
295 | - /* |
296 | - * This sets the entry for the first iteration. It |
297 | - * also is a safeguard against the entry disappearing |
298 | - * while the lock is not held. |
299 | - */ |
300 | - entry = list_entry(swapped.list.prev, |
301 | - struct swapped_entry, swapped_list); |
302 | - } else if (entry->swapped_list.prev == swapped.list.next) { |
303 | - /* |
304 | - * If we have iterated over all entries and there are |
305 | - * still entries that weren't swapped out there may |
306 | - * be a reason we could not swap them back in so |
307 | - * delay attempting further prefetching. |
308 | - */ |
309 | - spin_unlock_irqrestore(&swapped.lock, flags); |
310 | - break; |
311 | - } |
312 | - |
313 | - node = get_swap_entry_node(entry); |
314 | + if (unlikely(!pos)) |
315 | + continue; |
316 | + node = get_swap_entry_node(pos); |
317 | if (!node_isset(node, sp_stat.prefetch_nodes)) { |
318 | /* |
319 | * We found an entry that belongs to a node that is |
320 | * not suitable for prefetching so skip it. |
321 | */ |
322 | - entry = prev_swapped_entry(entry); |
323 | - spin_unlock_irqrestore(&swapped.lock, flags); |
324 | continue; |
325 | } |
326 | - swp_entry = entry->swp_entry; |
327 | - entry = prev_swapped_entry(entry); |
328 | + swp_entry = pos->swp_entry; |
329 | spin_unlock_irqrestore(&swapped.lock, flags); |
330 | |
331 | if (trickle_swap_cache_async(swp_entry, node) == TRICKLE_DELAY) |
332 | - break; |
333 | + goto out_unlocked; |
334 | + spin_lock_irqsave(&swapped.lock, flags); |
335 | } |
336 | + spin_unlock_irqrestore(&swapped.lock, flags); |
337 | |
338 | +out_unlocked: |
339 | if (sp_stat.prefetched_pages) { |
340 | lru_add_drain(); |
341 | sp_stat.prefetched_pages = 0; |
342 | @@ -509,13 +485,14 @@ static int kprefetchd(void *__unused) |
343 | sched_setscheduler(current, SCHED_BATCH, ¶m); |
344 | set_user_nice(current, 19); |
345 | /* Set ioprio to lowest if supported by i/o scheduler */ |
346 | - sys_ioprio_set(IOPRIO_WHO_PROCESS, 0, IOPRIO_CLASS_IDLE); |
347 | + sys_ioprio_set(IOPRIO_WHO_PROCESS, IOPRIO_BE_NR - 1, IOPRIO_CLASS_BE); |
348 | |
349 | /* kprefetchd has nothing to do until it is woken up the first time */ |
350 | + wakeup_kprefetchd = 1; |
351 | set_current_state(TASK_INTERRUPTIBLE); |
352 | schedule(); |
353 | |
354 | - do { |
355 | + while (!kthread_should_stop()) { |
356 | try_to_freeze(); |
357 | |
358 | /* |
359 | @@ -523,13 +500,17 @@ static int kprefetchd(void *__unused) |
360 | * a wakeup, and further delay the next one. |
361 | */ |
362 | if (trickle_swap() == TRICKLE_FAILED) { |
363 | + wakeup_kprefetchd = 1; |
364 | set_current_state(TASK_INTERRUPTIBLE); |
365 | schedule(); |
366 | - } |
367 | + } else |
368 | + wakeup_kprefetchd = 0; |
369 | clear_last_prefetch_free(); |
370 | - schedule_timeout_interruptible(PREFETCH_DELAY); |
371 | - } while (!kthread_should_stop()); |
372 | - |
373 | + if (!prefetch_enabled()) |
374 | + schedule_timeout_interruptible(DISABLED_PREFETCH_DELAY); |
375 | + else |
376 | + schedule_timeout_interruptible(PREFETCH_DELAY); |
377 | + } |
378 | return 0; |
379 | } |
380 | |
381 | Index: linux-2.6.21-ck2/Documentation/sysctl/vm.txt |
382 | =================================================================== |
383 | --- linux-2.6.21-ck2.orig/Documentation/sysctl/vm.txt 2007-05-14 19:49:56.000000000 +1000 |
384 | +++ linux-2.6.21-ck2/Documentation/sysctl/vm.txt 2007-05-14 19:49:57.000000000 +1000 |
385 | @@ -236,7 +236,8 @@ swap_prefetch |
386 | This enables or disables the swap prefetching feature. When the virtual |
387 | memory subsystem has been extremely idle for at least 5 seconds it will start |
388 | copying back pages from swap into the swapcache and keep a copy in swap. In |
389 | -practice it can take many minutes before the vm is idle enough. |
390 | +practice it can take many minutes before the vm is idle enough. A value of 0 |
391 | +disables swap prefetching, 1 enables it unless laptop_mode is enabled, and 2 |
392 | +enables it even in the presence of laptop_mode. |
393 | |
394 | The default value is 1. |
395 | - |