/[pkg-src]/trunk/kernel26-alx/patches-2.6.17-r7/0018-2.6.17-swsusp-rework-memory-shrinker-rev-2.patch |
Annotation of /trunk/kernel26-alx/patches-2.6.17-r7/0018-2.6.17-swsusp-rework-memory-shrinker-rev-2.patch
Parent Directory | Revision Log
Revision 199 -
(hide annotations)
(download)
Fri May 18 11:04:36 2007 UTC (17 years, 4 months ago) by niro
File size: 11840 byte(s)
Fri May 18 11:04:36 2007 UTC (17 years, 4 months ago) by niro
File size: 11840 byte(s)
-import
1 | niro | 199 | |
2 | From: "Rafael J. Wysocki" <rjw@sisk.pl> | ||
3 | |||
4 | Rework the swsusp's memory shrinker in the following way: | ||
5 | |||
6 | - Simplify balance_pgdat() by removing all of the swsusp-related code | ||
7 | from it. | ||
8 | |||
9 | - Make shrink_all_memory() use shrink_slab() and a new function | ||
10 | shrink_all_zones() which calls shrink_active_list() and | ||
11 | shrink_inactive_list() directly for each zone in a way that's optimized | ||
12 | for suspend. | ||
13 | |||
14 | In shrink_all_memory() we try to free exactly as many pages as the caller | ||
15 | asks for, preferably in one shot, starting from easier targets. If slab | ||
16 | caches are huge, they are most likely to have enough pages to reclaim. | ||
17 | The inactive lists are next (the zones with more inactive pages go first) | ||
18 | etc. | ||
19 | |||
20 | Each time shrink_all_memory() attempts to shrink the active and inactive | ||
21 | lists for each zone in 5 passes. In the first pass, only the inactive | ||
22 | lists are taken into consideration. In the next two passes the active | ||
23 | lists are also shrunk, but mapped pages are not reclaimed. In the last | ||
24 | two passes the active and inactive lists are shrunk and mapped pages are | ||
25 | reclaimed as well. The aim of this is to alter the reclaim logic to choose | ||
26 | the best pages to keep on resume and improve the responsiveness of the | ||
27 | resumed system. | ||
28 | |||
29 | Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl> | ||
30 | Signed-off-by: Con Kolivas <kernel@kolivas.org> | ||
31 | Signed-off-by: Andrew Morton <akpm@osdl.org> | ||
32 | --- | ||
33 | |||
34 | kernel/power/swsusp.c | 10 +- | ||
35 | mm/vmscan.c | 223 ++++++++++++++++++++++++++++++++++++-------------- | ||
36 | 2 files changed, 173 insertions(+), 60 deletions(-) | ||
37 | |||
38 | Index: linux-ck-dev/kernel/power/swsusp.c | ||
39 | =================================================================== | ||
40 | --- linux-ck-dev.orig/kernel/power/swsusp.c 2006-06-18 15:20:12.000000000 +1000 | ||
41 | +++ linux-ck-dev/kernel/power/swsusp.c 2006-06-18 15:24:52.000000000 +1000 | ||
42 | @@ -175,6 +175,12 @@ void free_all_swap_pages(int swap, struc | ||
43 | */ | ||
44 | |||
45 | #define SHRINK_BITE 10000 | ||
46 | +static inline unsigned long __shrink_memory(long tmp) | ||
47 | +{ | ||
48 | + if (tmp > SHRINK_BITE) | ||
49 | + tmp = SHRINK_BITE; | ||
50 | + return shrink_all_memory(tmp); | ||
51 | +} | ||
52 | |||
53 | int swsusp_shrink_memory(void) | ||
54 | { | ||
55 | @@ -195,12 +201,12 @@ int swsusp_shrink_memory(void) | ||
56 | if (!is_highmem(zone)) | ||
57 | tmp -= zone->free_pages; | ||
58 | if (tmp > 0) { | ||
59 | - tmp = shrink_all_memory(SHRINK_BITE); | ||
60 | + tmp = __shrink_memory(tmp); | ||
61 | if (!tmp) | ||
62 | return -ENOMEM; | ||
63 | pages += tmp; | ||
64 | } else if (size > image_size / PAGE_SIZE) { | ||
65 | - tmp = shrink_all_memory(SHRINK_BITE); | ||
66 | + tmp = __shrink_memory(size - (image_size / PAGE_SIZE)); | ||
67 | pages += tmp; | ||
68 | } | ||
69 | printk("\b%c", p[i++%4]); | ||
70 | Index: linux-ck-dev/mm/vmscan.c | ||
71 | =================================================================== | ||
72 | --- linux-ck-dev.orig/mm/vmscan.c 2006-06-18 15:24:48.000000000 +1000 | ||
73 | +++ linux-ck-dev/mm/vmscan.c 2006-06-18 15:24:52.000000000 +1000 | ||
74 | @@ -62,6 +62,8 @@ struct scan_control { | ||
75 | * In this context, it doesn't matter that we scan the | ||
76 | * whole list at once. */ | ||
77 | int swap_cluster_max; | ||
78 | + | ||
79 | + int swappiness; | ||
80 | }; | ||
81 | |||
82 | /* | ||
83 | @@ -743,7 +745,7 @@ static void shrink_active_list(unsigned | ||
84 | * A 100% value of vm_swappiness overrides this algorithm | ||
85 | * altogether. | ||
86 | */ | ||
87 | - swap_tendency = mapped_ratio / 2 + distress + vm_swappiness; | ||
88 | + swap_tendency = mapped_ratio / 2 + distress + sc->swappiness; | ||
89 | |||
90 | /* | ||
91 | * Now use this metric to decide whether to start moving mapped | ||
92 | @@ -959,6 +961,7 @@ unsigned long try_to_free_pages(struct z | ||
93 | .may_writepage = !laptop_mode, | ||
94 | .swap_cluster_max = SWAP_CLUSTER_MAX, | ||
95 | .may_swap = 1, | ||
96 | + .swappiness = vm_swappiness, | ||
97 | }; | ||
98 | |||
99 | delay_swap_prefetch(); | ||
100 | @@ -1025,10 +1028,6 @@ out: | ||
101 | * For kswapd, balance_pgdat() will work across all this node's zones until | ||
102 | * they are all at pages_high. | ||
103 | * | ||
104 | - * If `nr_pages' is non-zero then it is the number of pages which are to be | ||
105 | - * reclaimed, regardless of the zone occupancies. This is a software suspend | ||
106 | - * special. | ||
107 | - * | ||
108 | * Returns the number of pages which were actually freed. | ||
109 | * | ||
110 | * There is special handling here for zones which are full of pinned pages. | ||
111 | @@ -1046,10 +1045,8 @@ out: | ||
112 | * the page allocator fallback scheme to ensure that aging of pages is balanced | ||
113 | * across the zones. | ||
114 | */ | ||
115 | -static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages, | ||
116 | - int order) | ||
117 | +static unsigned long balance_pgdat(pg_data_t *pgdat, int order) | ||
118 | { | ||
119 | - unsigned long to_free = nr_pages; | ||
120 | int all_zones_ok; | ||
121 | int priority; | ||
122 | int i; | ||
123 | @@ -1059,7 +1056,8 @@ static unsigned long balance_pgdat(pg_da | ||
124 | struct scan_control sc = { | ||
125 | .gfp_mask = GFP_KERNEL, | ||
126 | .may_swap = 1, | ||
127 | - .swap_cluster_max = nr_pages ? nr_pages : SWAP_CLUSTER_MAX, | ||
128 | + .swap_cluster_max = SWAP_CLUSTER_MAX, | ||
129 | + .swappiness = vm_swappiness, | ||
130 | }; | ||
131 | |||
132 | loop_again: | ||
133 | @@ -1086,31 +1084,26 @@ loop_again: | ||
134 | |||
135 | all_zones_ok = 1; | ||
136 | |||
137 | - if (nr_pages == 0) { | ||
138 | - /* | ||
139 | - * Scan in the highmem->dma direction for the highest | ||
140 | - * zone which needs scanning | ||
141 | - */ | ||
142 | - for (i = pgdat->nr_zones - 1; i >= 0; i--) { | ||
143 | - struct zone *zone = pgdat->node_zones + i; | ||
144 | + /* | ||
145 | + * Scan in the highmem->dma direction for the highest | ||
146 | + * zone which needs scanning | ||
147 | + */ | ||
148 | + for (i = pgdat->nr_zones - 1; i >= 0; i--) { | ||
149 | + struct zone *zone = pgdat->node_zones + i; | ||
150 | |||
151 | - if (!populated_zone(zone)) | ||
152 | - continue; | ||
153 | + if (!populated_zone(zone)) | ||
154 | + continue; | ||
155 | |||
156 | - if (zone->all_unreclaimable && | ||
157 | - priority != DEF_PRIORITY) | ||
158 | - continue; | ||
159 | - | ||
160 | - if (!zone_watermark_ok(zone, order, | ||
161 | - zone->pages_high, 0, 0)) { | ||
162 | - end_zone = i; | ||
163 | - goto scan; | ||
164 | - } | ||
165 | + if (zone->all_unreclaimable && priority != DEF_PRIORITY) | ||
166 | + continue; | ||
167 | + | ||
168 | + if (!zone_watermark_ok(zone, order, zone->pages_high, | ||
169 | + 0, 0)) { | ||
170 | + end_zone = i; | ||
171 | + goto scan; | ||
172 | } | ||
173 | - goto out; | ||
174 | - } else { | ||
175 | - end_zone = pgdat->nr_zones - 1; | ||
176 | } | ||
177 | + goto out; | ||
178 | scan: | ||
179 | for (i = 0; i <= end_zone; i++) { | ||
180 | struct zone *zone = pgdat->node_zones + i; | ||
181 | @@ -1137,11 +1130,9 @@ scan: | ||
182 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) | ||
183 | continue; | ||
184 | |||
185 | - if (nr_pages == 0) { /* Not software suspend */ | ||
186 | - if (!zone_watermark_ok(zone, order, | ||
187 | - zone->pages_high, end_zone, 0)) | ||
188 | - all_zones_ok = 0; | ||
189 | - } | ||
190 | + if (!zone_watermark_ok(zone, order, zone->pages_high, | ||
191 | + end_zone, 0)) | ||
192 | + all_zones_ok = 0; | ||
193 | zone->temp_priority = priority; | ||
194 | if (zone->prev_priority > priority) | ||
195 | zone->prev_priority = priority; | ||
196 | @@ -1166,8 +1157,6 @@ scan: | ||
197 | total_scanned > nr_reclaimed + nr_reclaimed / 2) | ||
198 | sc.may_writepage = 1; | ||
199 | } | ||
200 | - if (nr_pages && to_free > nr_reclaimed) | ||
201 | - continue; /* swsusp: need to do more work */ | ||
202 | if (all_zones_ok) | ||
203 | break; /* kswapd: all done */ | ||
204 | /* | ||
205 | @@ -1183,7 +1172,7 @@ scan: | ||
206 | * matches the direct reclaim path behaviour in terms of impact | ||
207 | * on zone->*_priority. | ||
208 | */ | ||
209 | - if ((nr_reclaimed >= SWAP_CLUSTER_MAX) && !nr_pages) | ||
210 | + if (nr_reclaimed >= SWAP_CLUSTER_MAX) | ||
211 | break; | ||
212 | } | ||
213 | out: | ||
214 | @@ -1265,7 +1254,7 @@ static int kswapd(void *p) | ||
215 | } | ||
216 | finish_wait(&pgdat->kswapd_wait, &wait); | ||
217 | |||
218 | - balance_pgdat(pgdat, 0, order); | ||
219 | + balance_pgdat(pgdat, order); | ||
220 | } | ||
221 | return 0; | ||
222 | } | ||
223 | @@ -1294,37 +1283,154 @@ void wakeup_kswapd(struct zone *zone, in | ||
224 | |||
225 | #ifdef CONFIG_PM | ||
226 | /* | ||
227 | - * Try to free `nr_pages' of memory, system-wide. Returns the number of freed | ||
228 | - * pages. | ||
229 | + * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages | ||
230 | + * from LRU lists system-wide, for given pass and priority, and returns the | ||
231 | + * number of reclaimed pages | ||
232 | + * | ||
233 | + * For pass > 3 we also try to shrink the LRU lists that contain a few pages | ||
234 | + */ | ||
235 | +unsigned long shrink_all_zones(unsigned long nr_pages, int pass, int prio, | ||
236 | + struct scan_control *sc) | ||
237 | +{ | ||
238 | + struct zone *zone; | ||
239 | + unsigned long nr_to_scan, ret = 0; | ||
240 | + | ||
241 | + for_each_zone(zone) { | ||
242 | + | ||
243 | + if (!populated_zone(zone)) | ||
244 | + continue; | ||
245 | + | ||
246 | + if (zone->all_unreclaimable && prio != DEF_PRIORITY) | ||
247 | + continue; | ||
248 | + | ||
249 | + /* For pass = 0 we don't shrink the active list */ | ||
250 | + if (pass > 0) { | ||
251 | + zone->nr_scan_active += (zone->nr_active >> prio) + 1; | ||
252 | + if (zone->nr_scan_active >= nr_pages || pass > 3) { | ||
253 | + zone->nr_scan_active = 0; | ||
254 | + nr_to_scan = min(nr_pages, zone->nr_active); | ||
255 | + shrink_active_list(nr_to_scan, zone, sc); | ||
256 | + } | ||
257 | + } | ||
258 | + | ||
259 | + zone->nr_scan_inactive += (zone->nr_inactive >> prio) + 1; | ||
260 | + if (zone->nr_scan_inactive >= nr_pages || pass > 3) { | ||
261 | + zone->nr_scan_inactive = 0; | ||
262 | + nr_to_scan = min(nr_pages, zone->nr_inactive); | ||
263 | + ret += shrink_inactive_list(nr_to_scan, zone, sc); | ||
264 | + if (ret >= nr_pages) | ||
265 | + return ret; | ||
266 | + } | ||
267 | + } | ||
268 | + | ||
269 | + return ret; | ||
270 | +} | ||
271 | + | ||
272 | +/* | ||
273 | + * Try to free `nr_pages' of memory, system-wide, and return the number of | ||
274 | + * freed pages. | ||
275 | + * | ||
276 | + * Rather than trying to age LRUs the aim is to preserve the overall | ||
277 | + * LRU order by reclaiming preferentially | ||
278 | + * inactive > active > active referenced > active mapped | ||
279 | */ | ||
280 | unsigned long shrink_all_memory(unsigned long nr_pages) | ||
281 | { | ||
282 | - pg_data_t *pgdat; | ||
283 | - unsigned long nr_to_free = nr_pages; | ||
284 | + unsigned long lru_pages, nr_slab; | ||
285 | unsigned long ret = 0; | ||
286 | - unsigned retry = 2; | ||
287 | - struct reclaim_state reclaim_state = { | ||
288 | - .reclaimed_slab = 0, | ||
289 | + int pass; | ||
290 | + struct reclaim_state reclaim_state; | ||
291 | + struct zone *zone; | ||
292 | + struct scan_control sc = { | ||
293 | + .gfp_mask = GFP_KERNEL, | ||
294 | + .may_swap = 0, | ||
295 | + .swap_cluster_max = nr_pages, | ||
296 | + .may_writepage = 1, | ||
297 | + .swappiness = vm_swappiness, | ||
298 | }; | ||
299 | |||
300 | - delay_swap_prefetch(); | ||
301 | - | ||
302 | current->reclaim_state = &reclaim_state; | ||
303 | -repeat: | ||
304 | - for_each_online_pgdat(pgdat) { | ||
305 | - unsigned long freed; | ||
306 | |||
307 | - freed = balance_pgdat(pgdat, nr_to_free, 0); | ||
308 | - ret += freed; | ||
309 | - nr_to_free -= freed; | ||
310 | - if ((long)nr_to_free <= 0) | ||
311 | + lru_pages = 0; | ||
312 | + for_each_zone(zone) | ||
313 | + lru_pages += zone->nr_active + zone->nr_inactive; | ||
314 | + | ||
315 | + nr_slab = read_page_state(nr_slab); | ||
316 | + /* If slab caches are huge, it's better to hit them first */ | ||
317 | + while (nr_slab >= lru_pages) { | ||
318 | + reclaim_state.reclaimed_slab = 0; | ||
319 | + shrink_slab(nr_pages, sc.gfp_mask, lru_pages); | ||
320 | + if (!reclaim_state.reclaimed_slab) | ||
321 | break; | ||
322 | + | ||
323 | + ret += reclaim_state.reclaimed_slab; | ||
324 | + if (ret >= nr_pages) | ||
325 | + goto out; | ||
326 | + | ||
327 | + nr_slab -= reclaim_state.reclaimed_slab; | ||
328 | } | ||
329 | - if (retry-- && ret < nr_pages) { | ||
330 | - blk_congestion_wait(WRITE, HZ/5); | ||
331 | - goto repeat; | ||
332 | + | ||
333 | + /* | ||
334 | + * We try to shrink LRUs in 5 passes: | ||
335 | + * 0 = Reclaim from inactive_list only | ||
336 | + * 1 = Reclaim from active list but don't reclaim mapped | ||
337 | + * 2 = 2nd pass of type 1 | ||
338 | + * 3 = Reclaim mapped (normal reclaim) | ||
339 | + * 4 = 2nd pass of type 3 | ||
340 | + */ | ||
341 | + for (pass = 0; pass < 5; pass++) { | ||
342 | + int prio; | ||
343 | + | ||
344 | + /* Needed for shrinking slab caches later on */ | ||
345 | + if (!lru_pages) | ||
346 | + for_each_zone(zone) { | ||
347 | + lru_pages += zone->nr_active; | ||
348 | + lru_pages += zone->nr_inactive; | ||
349 | + } | ||
350 | + | ||
351 | + /* Force reclaiming mapped pages in the passes #3 and #4 */ | ||
352 | + if (pass > 2) { | ||
353 | + sc.may_swap = 1; | ||
354 | + sc.swappiness = 100; | ||
355 | + } | ||
356 | + | ||
357 | + for (prio = DEF_PRIORITY; prio >= 0; prio--) { | ||
358 | + unsigned long nr_to_scan = nr_pages - ret; | ||
359 | + | ||
360 | + sc.nr_mapped = read_page_state(nr_mapped); | ||
361 | + sc.nr_scanned = 0; | ||
362 | + | ||
363 | + ret += shrink_all_zones(nr_to_scan, prio, pass, &sc); | ||
364 | + if (ret >= nr_pages) | ||
365 | + goto out; | ||
366 | + | ||
367 | + reclaim_state.reclaimed_slab = 0; | ||
368 | + shrink_slab(sc.nr_scanned, sc.gfp_mask, lru_pages); | ||
369 | + ret += reclaim_state.reclaimed_slab; | ||
370 | + if (ret >= nr_pages) | ||
371 | + goto out; | ||
372 | + | ||
373 | + if (sc.nr_scanned && prio < DEF_PRIORITY - 2) | ||
374 | + blk_congestion_wait(WRITE, HZ / 10); | ||
375 | + } | ||
376 | + | ||
377 | + lru_pages = 0; | ||
378 | } | ||
379 | + | ||
380 | + /* | ||
381 | + * If ret = 0, we could not shrink LRUs, but there may be something | ||
382 | + * in slab caches | ||
383 | + */ | ||
384 | + if (!ret) | ||
385 | + do { | ||
386 | + reclaim_state.reclaimed_slab = 0; | ||
387 | + shrink_slab(nr_pages, sc.gfp_mask, lru_pages); | ||
388 | + ret += reclaim_state.reclaimed_slab; | ||
389 | + } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0); | ||
390 | + | ||
391 | +out: | ||
392 | current->reclaim_state = NULL; | ||
393 | + | ||
394 | return ret; | ||
395 | } | ||
396 | #endif | ||
397 | @@ -1422,6 +1528,7 @@ static int __zone_reclaim(struct zone *z | ||
398 | .swap_cluster_max = max_t(unsigned long, nr_pages, | ||
399 | SWAP_CLUSTER_MAX), | ||
400 | .gfp_mask = gfp_mask, | ||
401 | + .swappiness = vm_swappiness, | ||
402 | }; | ||
403 | |||
404 | disable_swap_token(); |