/[pkg-src]/trunk/kernel26-alx/patches-2.6.17-r5/0018-2.6.17-swsusp-rework-memory-shrinker-rev-2.patch |
Contents of /trunk/kernel26-alx/patches-2.6.17-r5/0018-2.6.17-swsusp-rework-memory-shrinker-rev-2.patch
Parent Directory | Revision Log
Revision 199 -
(show annotations)
(download)
Fri May 18 11:04:36 2007 UTC (17 years, 4 months ago) by niro
File size: 11840 byte(s)
Fri May 18 11:04:36 2007 UTC (17 years, 4 months ago) by niro
File size: 11840 byte(s)
-import
1 | |
2 | From: "Rafael J. Wysocki" <rjw@sisk.pl> |
3 | |
4 | Rework the swsusp's memory shrinker in the following way: |
5 | |
6 | - Simplify balance_pgdat() by removing all of the swsusp-related code |
7 | from it. |
8 | |
9 | - Make shrink_all_memory() use shrink_slab() and a new function |
10 | shrink_all_zones() which calls shrink_active_list() and |
11 | shrink_inactive_list() directly for each zone in a way that's optimized |
12 | for suspend. |
13 | |
14 | In shrink_all_memory() we try to free exactly as many pages as the caller |
15 | asks for, preferably in one shot, starting from easier targets. If slab |
16 | caches are huge, they are most likely to have enough pages to reclaim. |
17 | The inactive lists are next (the zones with more inactive pages go first) |
18 | etc. |
19 | |
20 | Each time shrink_all_memory() attempts to shrink the active and inactive |
21 | lists for each zone in 5 passes. In the first pass, only the inactive |
22 | lists are taken into consideration. In the next two passes the active |
23 | lists are also shrunk, but mapped pages are not reclaimed. In the last |
24 | two passes the active and inactive lists are shrunk and mapped pages are |
25 | reclaimed as well. The aim of this is to alter the reclaim logic to choose |
26 | the best pages to keep on resume and improve the responsiveness of the |
27 | resumed system. |
28 | |
29 | Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl> |
30 | Signed-off-by: Con Kolivas <kernel@kolivas.org> |
31 | Signed-off-by: Andrew Morton <akpm@osdl.org> |
32 | --- |
33 | |
34 | kernel/power/swsusp.c | 10 +- |
35 | mm/vmscan.c | 223 ++++++++++++++++++++++++++++++++++++-------------- |
36 | 2 files changed, 173 insertions(+), 60 deletions(-) |
37 | |
38 | Index: linux-ck-dev/kernel/power/swsusp.c |
39 | =================================================================== |
40 | --- linux-ck-dev.orig/kernel/power/swsusp.c 2006-06-18 15:20:12.000000000 +1000 |
41 | +++ linux-ck-dev/kernel/power/swsusp.c 2006-06-18 15:24:52.000000000 +1000 |
42 | @@ -175,6 +175,12 @@ void free_all_swap_pages(int swap, struc |
43 | */ |
44 | |
45 | #define SHRINK_BITE 10000 |
46 | +static inline unsigned long __shrink_memory(long tmp) |
47 | +{ |
48 | + if (tmp > SHRINK_BITE) |
49 | + tmp = SHRINK_BITE; |
50 | + return shrink_all_memory(tmp); |
51 | +} |
52 | |
53 | int swsusp_shrink_memory(void) |
54 | { |
55 | @@ -195,12 +201,12 @@ int swsusp_shrink_memory(void) |
56 | if (!is_highmem(zone)) |
57 | tmp -= zone->free_pages; |
58 | if (tmp > 0) { |
59 | - tmp = shrink_all_memory(SHRINK_BITE); |
60 | + tmp = __shrink_memory(tmp); |
61 | if (!tmp) |
62 | return -ENOMEM; |
63 | pages += tmp; |
64 | } else if (size > image_size / PAGE_SIZE) { |
65 | - tmp = shrink_all_memory(SHRINK_BITE); |
66 | + tmp = __shrink_memory(size - (image_size / PAGE_SIZE)); |
67 | pages += tmp; |
68 | } |
69 | printk("\b%c", p[i++%4]); |
70 | Index: linux-ck-dev/mm/vmscan.c |
71 | =================================================================== |
72 | --- linux-ck-dev.orig/mm/vmscan.c 2006-06-18 15:24:48.000000000 +1000 |
73 | +++ linux-ck-dev/mm/vmscan.c 2006-06-18 15:24:52.000000000 +1000 |
74 | @@ -62,6 +62,8 @@ struct scan_control { |
75 | * In this context, it doesn't matter that we scan the |
76 | * whole list at once. */ |
77 | int swap_cluster_max; |
78 | + |
79 | + int swappiness; |
80 | }; |
81 | |
82 | /* |
83 | @@ -743,7 +745,7 @@ static void shrink_active_list(unsigned |
84 | * A 100% value of vm_swappiness overrides this algorithm |
85 | * altogether. |
86 | */ |
87 | - swap_tendency = mapped_ratio / 2 + distress + vm_swappiness; |
88 | + swap_tendency = mapped_ratio / 2 + distress + sc->swappiness; |
89 | |
90 | /* |
91 | * Now use this metric to decide whether to start moving mapped |
92 | @@ -959,6 +961,7 @@ unsigned long try_to_free_pages(struct z |
93 | .may_writepage = !laptop_mode, |
94 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
95 | .may_swap = 1, |
96 | + .swappiness = vm_swappiness, |
97 | }; |
98 | |
99 | delay_swap_prefetch(); |
100 | @@ -1025,10 +1028,6 @@ out: |
101 | * For kswapd, balance_pgdat() will work across all this node's zones until |
102 | * they are all at pages_high. |
103 | * |
104 | - * If `nr_pages' is non-zero then it is the number of pages which are to be |
105 | - * reclaimed, regardless of the zone occupancies. This is a software suspend |
106 | - * special. |
107 | - * |
108 | * Returns the number of pages which were actually freed. |
109 | * |
110 | * There is special handling here for zones which are full of pinned pages. |
111 | @@ -1046,10 +1045,8 @@ out: |
112 | * the page allocator fallback scheme to ensure that aging of pages is balanced |
113 | * across the zones. |
114 | */ |
115 | -static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages, |
116 | - int order) |
117 | +static unsigned long balance_pgdat(pg_data_t *pgdat, int order) |
118 | { |
119 | - unsigned long to_free = nr_pages; |
120 | int all_zones_ok; |
121 | int priority; |
122 | int i; |
123 | @@ -1059,7 +1056,8 @@ static unsigned long balance_pgdat(pg_da |
124 | struct scan_control sc = { |
125 | .gfp_mask = GFP_KERNEL, |
126 | .may_swap = 1, |
127 | - .swap_cluster_max = nr_pages ? nr_pages : SWAP_CLUSTER_MAX, |
128 | + .swap_cluster_max = SWAP_CLUSTER_MAX, |
129 | + .swappiness = vm_swappiness, |
130 | }; |
131 | |
132 | loop_again: |
133 | @@ -1086,31 +1084,26 @@ loop_again: |
134 | |
135 | all_zones_ok = 1; |
136 | |
137 | - if (nr_pages == 0) { |
138 | - /* |
139 | - * Scan in the highmem->dma direction for the highest |
140 | - * zone which needs scanning |
141 | - */ |
142 | - for (i = pgdat->nr_zones - 1; i >= 0; i--) { |
143 | - struct zone *zone = pgdat->node_zones + i; |
144 | + /* |
145 | + * Scan in the highmem->dma direction for the highest |
146 | + * zone which needs scanning |
147 | + */ |
148 | + for (i = pgdat->nr_zones - 1; i >= 0; i--) { |
149 | + struct zone *zone = pgdat->node_zones + i; |
150 | |
151 | - if (!populated_zone(zone)) |
152 | - continue; |
153 | + if (!populated_zone(zone)) |
154 | + continue; |
155 | |
156 | - if (zone->all_unreclaimable && |
157 | - priority != DEF_PRIORITY) |
158 | - continue; |
159 | - |
160 | - if (!zone_watermark_ok(zone, order, |
161 | - zone->pages_high, 0, 0)) { |
162 | - end_zone = i; |
163 | - goto scan; |
164 | - } |
165 | + if (zone->all_unreclaimable && priority != DEF_PRIORITY) |
166 | + continue; |
167 | + |
168 | + if (!zone_watermark_ok(zone, order, zone->pages_high, |
169 | + 0, 0)) { |
170 | + end_zone = i; |
171 | + goto scan; |
172 | } |
173 | - goto out; |
174 | - } else { |
175 | - end_zone = pgdat->nr_zones - 1; |
176 | } |
177 | + goto out; |
178 | scan: |
179 | for (i = 0; i <= end_zone; i++) { |
180 | struct zone *zone = pgdat->node_zones + i; |
181 | @@ -1137,11 +1130,9 @@ scan: |
182 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) |
183 | continue; |
184 | |
185 | - if (nr_pages == 0) { /* Not software suspend */ |
186 | - if (!zone_watermark_ok(zone, order, |
187 | - zone->pages_high, end_zone, 0)) |
188 | - all_zones_ok = 0; |
189 | - } |
190 | + if (!zone_watermark_ok(zone, order, zone->pages_high, |
191 | + end_zone, 0)) |
192 | + all_zones_ok = 0; |
193 | zone->temp_priority = priority; |
194 | if (zone->prev_priority > priority) |
195 | zone->prev_priority = priority; |
196 | @@ -1166,8 +1157,6 @@ scan: |
197 | total_scanned > nr_reclaimed + nr_reclaimed / 2) |
198 | sc.may_writepage = 1; |
199 | } |
200 | - if (nr_pages && to_free > nr_reclaimed) |
201 | - continue; /* swsusp: need to do more work */ |
202 | if (all_zones_ok) |
203 | break; /* kswapd: all done */ |
204 | /* |
205 | @@ -1183,7 +1172,7 @@ scan: |
206 | * matches the direct reclaim path behaviour in terms of impact |
207 | * on zone->*_priority. |
208 | */ |
209 | - if ((nr_reclaimed >= SWAP_CLUSTER_MAX) && !nr_pages) |
210 | + if (nr_reclaimed >= SWAP_CLUSTER_MAX) |
211 | break; |
212 | } |
213 | out: |
214 | @@ -1265,7 +1254,7 @@ static int kswapd(void *p) |
215 | } |
216 | finish_wait(&pgdat->kswapd_wait, &wait); |
217 | |
218 | - balance_pgdat(pgdat, 0, order); |
219 | + balance_pgdat(pgdat, order); |
220 | } |
221 | return 0; |
222 | } |
223 | @@ -1294,37 +1283,154 @@ void wakeup_kswapd(struct zone *zone, in |
224 | |
225 | #ifdef CONFIG_PM |
226 | /* |
227 | - * Try to free `nr_pages' of memory, system-wide. Returns the number of freed |
228 | - * pages. |
229 | + * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages |
230 | + * from LRU lists system-wide, for given pass and priority, and returns the |
231 | + * number of reclaimed pages |
232 | + * |
233 | + * For pass > 3 we also try to shrink the LRU lists that contain a few pages |
234 | + */ |
235 | +unsigned long shrink_all_zones(unsigned long nr_pages, int pass, int prio, |
236 | + struct scan_control *sc) |
237 | +{ |
238 | + struct zone *zone; |
239 | + unsigned long nr_to_scan, ret = 0; |
240 | + |
241 | + for_each_zone(zone) { |
242 | + |
243 | + if (!populated_zone(zone)) |
244 | + continue; |
245 | + |
246 | + if (zone->all_unreclaimable && prio != DEF_PRIORITY) |
247 | + continue; |
248 | + |
249 | + /* For pass = 0 we don't shrink the active list */ |
250 | + if (pass > 0) { |
251 | + zone->nr_scan_active += (zone->nr_active >> prio) + 1; |
252 | + if (zone->nr_scan_active >= nr_pages || pass > 3) { |
253 | + zone->nr_scan_active = 0; |
254 | + nr_to_scan = min(nr_pages, zone->nr_active); |
255 | + shrink_active_list(nr_to_scan, zone, sc); |
256 | + } |
257 | + } |
258 | + |
259 | + zone->nr_scan_inactive += (zone->nr_inactive >> prio) + 1; |
260 | + if (zone->nr_scan_inactive >= nr_pages || pass > 3) { |
261 | + zone->nr_scan_inactive = 0; |
262 | + nr_to_scan = min(nr_pages, zone->nr_inactive); |
263 | + ret += shrink_inactive_list(nr_to_scan, zone, sc); |
264 | + if (ret >= nr_pages) |
265 | + return ret; |
266 | + } |
267 | + } |
268 | + |
269 | + return ret; |
270 | +} |
271 | + |
272 | +/* |
273 | + * Try to free `nr_pages' of memory, system-wide, and return the number of |
274 | + * freed pages. |
275 | + * |
276 | + * Rather than trying to age LRUs the aim is to preserve the overall |
277 | + * LRU order by reclaiming preferentially |
278 | + * inactive > active > active referenced > active mapped |
279 | */ |
280 | unsigned long shrink_all_memory(unsigned long nr_pages) |
281 | { |
282 | - pg_data_t *pgdat; |
283 | - unsigned long nr_to_free = nr_pages; |
284 | + unsigned long lru_pages, nr_slab; |
285 | unsigned long ret = 0; |
286 | - unsigned retry = 2; |
287 | - struct reclaim_state reclaim_state = { |
288 | - .reclaimed_slab = 0, |
289 | + int pass; |
290 | + struct reclaim_state reclaim_state; |
291 | + struct zone *zone; |
292 | + struct scan_control sc = { |
293 | + .gfp_mask = GFP_KERNEL, |
294 | + .may_swap = 0, |
295 | + .swap_cluster_max = nr_pages, |
296 | + .may_writepage = 1, |
297 | + .swappiness = vm_swappiness, |
298 | }; |
299 | |
300 | - delay_swap_prefetch(); |
301 | - |
302 | current->reclaim_state = &reclaim_state; |
303 | -repeat: |
304 | - for_each_online_pgdat(pgdat) { |
305 | - unsigned long freed; |
306 | |
307 | - freed = balance_pgdat(pgdat, nr_to_free, 0); |
308 | - ret += freed; |
309 | - nr_to_free -= freed; |
310 | - if ((long)nr_to_free <= 0) |
311 | + lru_pages = 0; |
312 | + for_each_zone(zone) |
313 | + lru_pages += zone->nr_active + zone->nr_inactive; |
314 | + |
315 | + nr_slab = read_page_state(nr_slab); |
316 | + /* If slab caches are huge, it's better to hit them first */ |
317 | + while (nr_slab >= lru_pages) { |
318 | + reclaim_state.reclaimed_slab = 0; |
319 | + shrink_slab(nr_pages, sc.gfp_mask, lru_pages); |
320 | + if (!reclaim_state.reclaimed_slab) |
321 | break; |
322 | + |
323 | + ret += reclaim_state.reclaimed_slab; |
324 | + if (ret >= nr_pages) |
325 | + goto out; |
326 | + |
327 | + nr_slab -= reclaim_state.reclaimed_slab; |
328 | } |
329 | - if (retry-- && ret < nr_pages) { |
330 | - blk_congestion_wait(WRITE, HZ/5); |
331 | - goto repeat; |
332 | + |
333 | + /* |
334 | + * We try to shrink LRUs in 5 passes: |
335 | + * 0 = Reclaim from inactive_list only |
336 | + * 1 = Reclaim from active list but don't reclaim mapped |
337 | + * 2 = 2nd pass of type 1 |
338 | + * 3 = Reclaim mapped (normal reclaim) |
339 | + * 4 = 2nd pass of type 3 |
340 | + */ |
341 | + for (pass = 0; pass < 5; pass++) { |
342 | + int prio; |
343 | + |
344 | + /* Needed for shrinking slab caches later on */ |
345 | + if (!lru_pages) |
346 | + for_each_zone(zone) { |
347 | + lru_pages += zone->nr_active; |
348 | + lru_pages += zone->nr_inactive; |
349 | + } |
350 | + |
351 | + /* Force reclaiming mapped pages in the passes #3 and #4 */ |
352 | + if (pass > 2) { |
353 | + sc.may_swap = 1; |
354 | + sc.swappiness = 100; |
355 | + } |
356 | + |
357 | + for (prio = DEF_PRIORITY; prio >= 0; prio--) { |
358 | + unsigned long nr_to_scan = nr_pages - ret; |
359 | + |
360 | + sc.nr_mapped = read_page_state(nr_mapped); |
361 | + sc.nr_scanned = 0; |
362 | + |
363 | + ret += shrink_all_zones(nr_to_scan, prio, pass, &sc); |
364 | + if (ret >= nr_pages) |
365 | + goto out; |
366 | + |
367 | + reclaim_state.reclaimed_slab = 0; |
368 | + shrink_slab(sc.nr_scanned, sc.gfp_mask, lru_pages); |
369 | + ret += reclaim_state.reclaimed_slab; |
370 | + if (ret >= nr_pages) |
371 | + goto out; |
372 | + |
373 | + if (sc.nr_scanned && prio < DEF_PRIORITY - 2) |
374 | + blk_congestion_wait(WRITE, HZ / 10); |
375 | + } |
376 | + |
377 | + lru_pages = 0; |
378 | } |
379 | + |
380 | + /* |
381 | + * If ret = 0, we could not shrink LRUs, but there may be something |
382 | + * in slab caches |
383 | + */ |
384 | + if (!ret) |
385 | + do { |
386 | + reclaim_state.reclaimed_slab = 0; |
387 | + shrink_slab(nr_pages, sc.gfp_mask, lru_pages); |
388 | + ret += reclaim_state.reclaimed_slab; |
389 | + } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0); |
390 | + |
391 | +out: |
392 | current->reclaim_state = NULL; |
393 | + |
394 | return ret; |
395 | } |
396 | #endif |
397 | @@ -1422,6 +1528,7 @@ static int __zone_reclaim(struct zone *z |
398 | .swap_cluster_max = max_t(unsigned long, nr_pages, |
399 | SWAP_CLUSTER_MAX), |
400 | .gfp_mask = gfp_mask, |
401 | + .swappiness = vm_swappiness, |
402 | }; |
403 | |
404 | disable_swap_token(); |