Magellan Linux

Contents of /trunk/kernel26-alx/patches-2.6.21-r14/0024-2.6.21-mm-swap-prefetch-35-38.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 447 - (show annotations) (download)
Tue Jan 22 17:55:52 2008 UTC (16 years, 3 months ago) by niro
File size: 12448 byte(s)
-2.6.21-alx-r14 - fixed some natsemi errors on wys terminals

1 ---
2 Documentation/sysctl/vm.txt | 5 -
3 mm/page_io.c | 2
4 mm/swap_prefetch.c | 167 +++++++++++++++++++-------------------------
5 mm/swap_state.c | 2
6 mm/vmscan.c | 1
7 5 files changed, 79 insertions(+), 98 deletions(-)
8
9 Index: linux-2.6.21-ck2/mm/page_io.c
10 ===================================================================
11 --- linux-2.6.21-ck2.orig/mm/page_io.c 2007-05-14 19:49:18.000000000 +1000
12 +++ linux-2.6.21-ck2/mm/page_io.c 2007-05-14 19:49:57.000000000 +1000
13 @@ -17,6 +17,7 @@
14 #include <linux/bio.h>
15 #include <linux/swapops.h>
16 #include <linux/writeback.h>
17 +#include <linux/swap-prefetch.h>
18 #include <asm/pgtable.h>
19
20 static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index,
21 @@ -118,6 +119,7 @@ int swap_writepage(struct page *page, st
22 ret = -ENOMEM;
23 goto out;
24 }
25 + add_to_swapped_list(page);
26 if (wbc->sync_mode == WB_SYNC_ALL)
27 rw |= (1 << BIO_RW_SYNC);
28 count_vm_event(PSWPOUT);
29 Index: linux-2.6.21-ck2/mm/swap_state.c
30 ===================================================================
31 --- linux-2.6.21-ck2.orig/mm/swap_state.c 2007-05-14 19:49:55.000000000 +1000
32 +++ linux-2.6.21-ck2/mm/swap_state.c 2007-05-14 19:49:57.000000000 +1000
33 @@ -83,7 +83,6 @@ static int __add_to_swap_cache(struct pa
34 error = radix_tree_insert(&swapper_space.page_tree,
35 entry.val, page);
36 if (!error) {
37 - remove_from_swapped_list(entry.val);
38 page_cache_get(page);
39 SetPageLocked(page);
40 SetPageSwapCache(page);
41 @@ -102,7 +101,6 @@ int add_to_swap_cache(struct page *page,
42 int error;
43
44 if (!swap_duplicate(entry)) {
45 - remove_from_swapped_list(entry.val);
46 INC_CACHE_INFO(noent_race);
47 return -ENOENT;
48 }
49 Index: linux-2.6.21-ck2/mm/vmscan.c
50 ===================================================================
51 --- linux-2.6.21-ck2.orig/mm/vmscan.c 2007-05-14 19:49:56.000000000 +1000
52 +++ linux-2.6.21-ck2/mm/vmscan.c 2007-05-14 19:49:57.000000000 +1000
53 @@ -427,7 +427,6 @@ int remove_mapping(struct address_space
54
55 if (PageSwapCache(page)) {
56 swp_entry_t swap = { .val = page_private(page) };
57 - add_to_swapped_list(page);
58 __delete_from_swap_cache(page);
59 write_unlock_irq(&mapping->tree_lock);
60 swap_free(swap);
61 Index: linux-2.6.21-ck2/mm/swap_prefetch.c
62 ===================================================================
63 --- linux-2.6.21-ck2.orig/mm/swap_prefetch.c 2007-05-14 19:49:55.000000000 +1000
64 +++ linux-2.6.21-ck2/mm/swap_prefetch.c 2007-05-14 19:49:57.000000000 +1000
65 @@ -27,7 +27,8 @@
66 * needs to be at least this duration of idle time meaning in practice it can
67 * be much longer
68 */
69 -#define PREFETCH_DELAY (HZ * 5)
70 +#define PREFETCH_DELAY (HZ * 5)
71 +#define DISABLED_PREFETCH_DELAY (HZ * 60)
72
73 /* sysctl - enable/disable swap prefetching */
74 int swap_prefetch __read_mostly = 1;
75 @@ -61,19 +62,30 @@ inline void delay_swap_prefetch(void)
76 }
77
78 /*
79 + * If laptop_mode is enabled don't prefetch to avoid hard drives
80 + * doing unnecessary spin-ups unless swap_prefetch is explicitly
81 + * set to a higher value.
82 + */
83 +static inline int prefetch_enabled(void)
84 +{
85 + if (swap_prefetch <= laptop_mode)
86 + return 0;
87 + return 1;
88 +}
89 +
90 +static int wakeup_kprefetchd;
91 +
92 +/*
93 * Drop behind accounting which keeps a list of the most recently used swap
94 - * entries.
95 + * entries. Entries are removed lazily by kprefetchd.
96 */
97 void add_to_swapped_list(struct page *page)
98 {
99 struct swapped_entry *entry;
100 unsigned long index, flags;
101 - int wakeup;
102 -
103 - if (!swap_prefetch)
104 - return;
105
106 - wakeup = 0;
107 + if (!prefetch_enabled())
108 + goto out;
109
110 spin_lock_irqsave(&swapped.lock, flags);
111 if (swapped.count >= swapped.maxcount) {
112 @@ -103,23 +115,16 @@ void add_to_swapped_list(struct page *pa
113 store_swap_entry_node(entry, page);
114
115 if (likely(!radix_tree_insert(&swapped.swap_tree, index, entry))) {
116 - /*
117 - * If this is the first entry, kprefetchd needs to be
118 - * (re)started.
119 - */
120 - if (!swapped.count)
121 - wakeup = 1;
122 list_add(&entry->swapped_list, &swapped.list);
123 swapped.count++;
124 - }
125 + } else
126 + kmem_cache_free(swapped.cache, entry);
127
128 out_locked:
129 spin_unlock_irqrestore(&swapped.lock, flags);
130 -
131 - /* Do the wakeup outside the lock to shorten lock hold time. */
132 - if (wakeup)
133 +out:
134 + if (wakeup_kprefetchd)
135 wake_up_process(kprefetchd_task);
136 -
137 return;
138 }
139
140 @@ -139,7 +144,7 @@ void remove_from_swapped_list(const unsi
141 spin_lock_irqsave(&swapped.lock, flags);
142 entry = radix_tree_delete(&swapped.swap_tree, index);
143 if (likely(entry)) {
144 - list_del_init(&entry->swapped_list);
145 + list_del(&entry->swapped_list);
146 swapped.count--;
147 kmem_cache_free(swapped.cache, entry);
148 }
149 @@ -153,18 +158,18 @@ enum trickle_return {
150 };
151
152 struct node_stats {
153 - unsigned long last_free;
154 /* Free ram after a cycle of prefetching */
155 - unsigned long current_free;
156 + unsigned long last_free;
157 /* Free ram on this cycle of checking prefetch_suitable */
158 - unsigned long prefetch_watermark;
159 + unsigned long current_free;
160 /* Maximum amount we will prefetch to */
161 - unsigned long highfree[MAX_NR_ZONES];
162 + unsigned long prefetch_watermark;
163 /* The amount of free ram before we start prefetching */
164 - unsigned long lowfree[MAX_NR_ZONES];
165 + unsigned long highfree[MAX_NR_ZONES];
166 /* The amount of free ram where we will stop prefetching */
167 - unsigned long *pointfree[MAX_NR_ZONES];
168 + unsigned long lowfree[MAX_NR_ZONES];
169 /* highfree or lowfree depending on whether we've hit a watermark */
170 + unsigned long *pointfree[MAX_NR_ZONES];
171 };
172
173 /*
174 @@ -172,10 +177,10 @@ struct node_stats {
175 * determine if a node is suitable for prefetching into.
176 */
177 struct prefetch_stats {
178 - nodemask_t prefetch_nodes;
179 /* Which nodes are currently suited to prefetching */
180 - unsigned long prefetched_pages;
181 + nodemask_t prefetch_nodes;
182 /* Total pages we've prefetched on this wakeup of kprefetchd */
183 + unsigned long prefetched_pages;
184 struct node_stats node[MAX_NUMNODES];
185 };
186
187 @@ -189,16 +194,15 @@ static enum trickle_return trickle_swap_
188 const int node)
189 {
190 enum trickle_return ret = TRICKLE_FAILED;
191 + unsigned long flags;
192 struct page *page;
193
194 - read_lock_irq(&swapper_space.tree_lock);
195 + read_lock_irqsave(&swapper_space.tree_lock, flags);
196 /* Entry may already exist */
197 page = radix_tree_lookup(&swapper_space.page_tree, entry.val);
198 - read_unlock_irq(&swapper_space.tree_lock);
199 - if (page) {
200 - remove_from_swapped_list(entry.val);
201 + read_unlock_irqrestore(&swapper_space.tree_lock, flags);
202 + if (page)
203 goto out;
204 - }
205
206 /*
207 * Get a new page to read from swap. We have already checked the
208 @@ -217,10 +221,8 @@ static enum trickle_return trickle_swap_
209
210 /* Add them to the tail of the inactive list to preserve LRU order */
211 lru_cache_add_tail(page);
212 - if (unlikely(swap_readpage(NULL, page))) {
213 - ret = TRICKLE_DELAY;
214 + if (unlikely(swap_readpage(NULL, page)))
215 goto out_release;
216 - }
217
218 sp_stat.prefetched_pages++;
219 sp_stat.node[node].last_free--;
220 @@ -229,6 +231,12 @@ static enum trickle_return trickle_swap_
221 out_release:
222 page_cache_release(page);
223 out:
224 + /*
225 + * All entries are removed here lazily. This avoids the cost of
226 + * remove_from_swapped_list during normal swapin. Thus there are
227 + * usually many stale entries.
228 + */
229 + remove_from_swapped_list(entry.val);
230 return ret;
231 }
232
233 @@ -410,17 +418,6 @@ out:
234 }
235
236 /*
237 - * Get previous swapped entry when iterating over all entries. swapped.lock
238 - * should be held and we should already ensure that entry exists.
239 - */
240 -static inline struct swapped_entry *prev_swapped_entry
241 - (struct swapped_entry *entry)
242 -{
243 - return list_entry(entry->swapped_list.prev->prev,
244 - struct swapped_entry, swapped_list);
245 -}
246 -
247 -/*
248 * trickle_swap is the main function that initiates the swap prefetching. It
249 * first checks to see if the busy flag is set, and does not prefetch if it
250 * is, as the flag implied we are low on memory or swapping in currently.
251 @@ -431,70 +428,49 @@ static inline struct swapped_entry *prev
252 static enum trickle_return trickle_swap(void)
253 {
254 enum trickle_return ret = TRICKLE_DELAY;
255 - struct swapped_entry *entry;
256 + struct swapped_entry *pos, *n;
257 unsigned long flags;
258
259 - /*
260 - * If laptop_mode is enabled don't prefetch to avoid hard drives
261 - * doing unnecessary spin-ups
262 - */
263 - if (!swap_prefetch || laptop_mode)
264 + if (!prefetch_enabled())
265 return ret;
266
267 examine_free_limits();
268 - entry = NULL;
269 + if (!prefetch_suitable())
270 + return ret;
271 + if (list_empty(&swapped.list))
272 + return TRICKLE_FAILED;
273
274 - for ( ; ; ) {
275 + spin_lock_irqsave(&swapped.lock, flags);
276 + list_for_each_entry_safe_reverse(pos, n, &swapped.list, swapped_list) {
277 swp_entry_t swp_entry;
278 int node;
279
280 - if (!prefetch_suitable())
281 - break;
282 + spin_unlock_irqrestore(&swapped.lock, flags);
283 + /* Yield to anything else running */
284 + if (cond_resched() || !prefetch_suitable())
285 + goto out_unlocked;
286
287 spin_lock_irqsave(&swapped.lock, flags);
288 - if (list_empty(&swapped.list)) {
289 - ret = TRICKLE_FAILED;
290 - spin_unlock_irqrestore(&swapped.lock, flags);
291 - break;
292 - }
293 -
294 - if (!entry) {
295 - /*
296 - * This sets the entry for the first iteration. It
297 - * also is a safeguard against the entry disappearing
298 - * while the lock is not held.
299 - */
300 - entry = list_entry(swapped.list.prev,
301 - struct swapped_entry, swapped_list);
302 - } else if (entry->swapped_list.prev == swapped.list.next) {
303 - /*
304 - * If we have iterated over all entries and there are
305 - * still entries that weren't swapped out there may
306 - * be a reason we could not swap them back in so
307 - * delay attempting further prefetching.
308 - */
309 - spin_unlock_irqrestore(&swapped.lock, flags);
310 - break;
311 - }
312 -
313 - node = get_swap_entry_node(entry);
314 + if (unlikely(!pos))
315 + continue;
316 + node = get_swap_entry_node(pos);
317 if (!node_isset(node, sp_stat.prefetch_nodes)) {
318 /*
319 * We found an entry that belongs to a node that is
320 * not suitable for prefetching so skip it.
321 */
322 - entry = prev_swapped_entry(entry);
323 - spin_unlock_irqrestore(&swapped.lock, flags);
324 continue;
325 }
326 - swp_entry = entry->swp_entry;
327 - entry = prev_swapped_entry(entry);
328 + swp_entry = pos->swp_entry;
329 spin_unlock_irqrestore(&swapped.lock, flags);
330
331 if (trickle_swap_cache_async(swp_entry, node) == TRICKLE_DELAY)
332 - break;
333 + goto out_unlocked;
334 + spin_lock_irqsave(&swapped.lock, flags);
335 }
336 + spin_unlock_irqrestore(&swapped.lock, flags);
337
338 +out_unlocked:
339 if (sp_stat.prefetched_pages) {
340 lru_add_drain();
341 sp_stat.prefetched_pages = 0;
342 @@ -509,13 +485,14 @@ static int kprefetchd(void *__unused)
343 sched_setscheduler(current, SCHED_BATCH, &param);
344 set_user_nice(current, 19);
345 /* Set ioprio to lowest if supported by i/o scheduler */
346 - sys_ioprio_set(IOPRIO_WHO_PROCESS, 0, IOPRIO_CLASS_IDLE);
347 + sys_ioprio_set(IOPRIO_WHO_PROCESS, IOPRIO_BE_NR - 1, IOPRIO_CLASS_BE);
348
349 /* kprefetchd has nothing to do until it is woken up the first time */
350 + wakeup_kprefetchd = 1;
351 set_current_state(TASK_INTERRUPTIBLE);
352 schedule();
353
354 - do {
355 + while (!kthread_should_stop()) {
356 try_to_freeze();
357
358 /*
359 @@ -523,13 +500,17 @@ static int kprefetchd(void *__unused)
360 * a wakeup, and further delay the next one.
361 */
362 if (trickle_swap() == TRICKLE_FAILED) {
363 + wakeup_kprefetchd = 1;
364 set_current_state(TASK_INTERRUPTIBLE);
365 schedule();
366 - }
367 + } else
368 + wakeup_kprefetchd = 0;
369 clear_last_prefetch_free();
370 - schedule_timeout_interruptible(PREFETCH_DELAY);
371 - } while (!kthread_should_stop());
372 -
373 + if (!prefetch_enabled())
374 + schedule_timeout_interruptible(DISABLED_PREFETCH_DELAY);
375 + else
376 + schedule_timeout_interruptible(PREFETCH_DELAY);
377 + }
378 return 0;
379 }
380
381 Index: linux-2.6.21-ck2/Documentation/sysctl/vm.txt
382 ===================================================================
383 --- linux-2.6.21-ck2.orig/Documentation/sysctl/vm.txt 2007-05-14 19:49:56.000000000 +1000
384 +++ linux-2.6.21-ck2/Documentation/sysctl/vm.txt 2007-05-14 19:49:57.000000000 +1000
385 @@ -236,7 +236,8 @@ swap_prefetch
386 This enables or disables the swap prefetching feature. When the virtual
387 memory subsystem has been extremely idle for at least 5 seconds it will start
388 copying back pages from swap into the swapcache and keep a copy in swap. In
389 -practice it can take many minutes before the vm is idle enough.
390 +practice it can take many minutes before the vm is idle enough. A value of 0
391 +disables swap prefetching, 1 enables it unless laptop_mode is enabled, and 2
392 +enables it even in the presence of laptop_mode.
393
394 The default value is 1.
395 -