Magellan Linux

Annotation of /trunk/kernel26-magellan/patches-2.6.17-r6/0018-2.6.17-swsusp-rework-memory-shrinker-rev-2.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 105 - (hide annotations) (download)
Sun Mar 11 16:17:56 2007 UTC (17 years, 2 months ago) by niro
File size: 11840 byte(s)
2.6.17-magellan-r6

1 niro 105
2     From: "Rafael J. Wysocki" <rjw@sisk.pl>
3    
4     Rework the swsusp's memory shrinker in the following way:
5    
6     - Simplify balance_pgdat() by removing all of the swsusp-related code
7     from it.
8    
9     - Make shrink_all_memory() use shrink_slab() and a new function
10     shrink_all_zones() which calls shrink_active_list() and
11     shrink_inactive_list() directly for each zone in a way that's optimized
12     for suspend.
13    
14     In shrink_all_memory() we try to free exactly as many pages as the caller
15     asks for, preferably in one shot, starting from easier targets.  If slab
16     caches are huge, they are most likely to have enough pages to reclaim.
17      The inactive lists are next (the zones with more inactive pages go first)
18     etc.
19    
20     Each time shrink_all_memory() attempts to shrink the active and inactive
21     lists for each zone in 5 passes.  In the first pass, only the inactive
22     lists are taken into consideration.  In the next two passes the active
23     lists are also shrunk, but mapped pages are not reclaimed.  In the last
24     two passes the active and inactive lists are shrunk and mapped pages are
25     reclaimed as well. The aim of this is to alter the reclaim logic to choose
26     the best pages to keep on resume and improve the responsiveness of the
27     resumed system.
28    
29     Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
30     Signed-off-by: Con Kolivas <kernel@kolivas.org>
31     Signed-off-by: Andrew Morton <akpm@osdl.org>
32     ---
33    
34     kernel/power/swsusp.c | 10 +-
35     mm/vmscan.c | 223 ++++++++++++++++++++++++++++++++++++--------------
36     2 files changed, 173 insertions(+), 60 deletions(-)
37    
38     Index: linux-ck-dev/kernel/power/swsusp.c
39     ===================================================================
40     --- linux-ck-dev.orig/kernel/power/swsusp.c 2006-06-18 15:20:12.000000000 +1000
41     +++ linux-ck-dev/kernel/power/swsusp.c 2006-06-18 15:24:52.000000000 +1000
42     @@ -175,6 +175,12 @@ void free_all_swap_pages(int swap, struc
43     */
44    
45     #define SHRINK_BITE 10000
46     +static inline unsigned long __shrink_memory(long tmp)
47     +{
48     + if (tmp > SHRINK_BITE)
49     + tmp = SHRINK_BITE;
50     + return shrink_all_memory(tmp);
51     +}
52    
53     int swsusp_shrink_memory(void)
54     {
55     @@ -195,12 +201,12 @@ int swsusp_shrink_memory(void)
56     if (!is_highmem(zone))
57     tmp -= zone->free_pages;
58     if (tmp > 0) {
59     - tmp = shrink_all_memory(SHRINK_BITE);
60     + tmp = __shrink_memory(tmp);
61     if (!tmp)
62     return -ENOMEM;
63     pages += tmp;
64     } else if (size > image_size / PAGE_SIZE) {
65     - tmp = shrink_all_memory(SHRINK_BITE);
66     + tmp = __shrink_memory(size - (image_size / PAGE_SIZE));
67     pages += tmp;
68     }
69     printk("\b%c", p[i++%4]);
70     Index: linux-ck-dev/mm/vmscan.c
71     ===================================================================
72     --- linux-ck-dev.orig/mm/vmscan.c 2006-06-18 15:24:48.000000000 +1000
73     +++ linux-ck-dev/mm/vmscan.c 2006-06-18 15:24:52.000000000 +1000
74     @@ -62,6 +62,8 @@ struct scan_control {
75     * In this context, it doesn't matter that we scan the
76     * whole list at once. */
77     int swap_cluster_max;
78     +
79     + int swappiness;
80     };
81    
82     /*
83     @@ -743,7 +745,7 @@ static void shrink_active_list(unsigned
84     * A 100% value of vm_swappiness overrides this algorithm
85     * altogether.
86     */
87     - swap_tendency = mapped_ratio / 2 + distress + vm_swappiness;
88     + swap_tendency = mapped_ratio / 2 + distress + sc->swappiness;
89    
90     /*
91     * Now use this metric to decide whether to start moving mapped
92     @@ -959,6 +961,7 @@ unsigned long try_to_free_pages(struct z
93     .may_writepage = !laptop_mode,
94     .swap_cluster_max = SWAP_CLUSTER_MAX,
95     .may_swap = 1,
96     + .swappiness = vm_swappiness,
97     };
98    
99     delay_swap_prefetch();
100     @@ -1025,10 +1028,6 @@ out:
101     * For kswapd, balance_pgdat() will work across all this node's zones until
102     * they are all at pages_high.
103     *
104     - * If `nr_pages' is non-zero then it is the number of pages which are to be
105     - * reclaimed, regardless of the zone occupancies. This is a software suspend
106     - * special.
107     - *
108     * Returns the number of pages which were actually freed.
109     *
110     * There is special handling here for zones which are full of pinned pages.
111     @@ -1046,10 +1045,8 @@ out:
112     * the page allocator fallback scheme to ensure that aging of pages is balanced
113     * across the zones.
114     */
115     -static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages,
116     - int order)
117     +static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
118     {
119     - unsigned long to_free = nr_pages;
120     int all_zones_ok;
121     int priority;
122     int i;
123     @@ -1059,7 +1056,8 @@ static unsigned long balance_pgdat(pg_da
124     struct scan_control sc = {
125     .gfp_mask = GFP_KERNEL,
126     .may_swap = 1,
127     - .swap_cluster_max = nr_pages ? nr_pages : SWAP_CLUSTER_MAX,
128     + .swap_cluster_max = SWAP_CLUSTER_MAX,
129     + .swappiness = vm_swappiness,
130     };
131    
132     loop_again:
133     @@ -1086,31 +1084,26 @@ loop_again:
134    
135     all_zones_ok = 1;
136    
137     - if (nr_pages == 0) {
138     - /*
139     - * Scan in the highmem->dma direction for the highest
140     - * zone which needs scanning
141     - */
142     - for (i = pgdat->nr_zones - 1; i >= 0; i--) {
143     - struct zone *zone = pgdat->node_zones + i;
144     + /*
145     + * Scan in the highmem->dma direction for the highest
146     + * zone which needs scanning
147     + */
148     + for (i = pgdat->nr_zones - 1; i >= 0; i--) {
149     + struct zone *zone = pgdat->node_zones + i;
150    
151     - if (!populated_zone(zone))
152     - continue;
153     + if (!populated_zone(zone))
154     + continue;
155    
156     - if (zone->all_unreclaimable &&
157     - priority != DEF_PRIORITY)
158     - continue;
159     -
160     - if (!zone_watermark_ok(zone, order,
161     - zone->pages_high, 0, 0)) {
162     - end_zone = i;
163     - goto scan;
164     - }
165     + if (zone->all_unreclaimable && priority != DEF_PRIORITY)
166     + continue;
167     +
168     + if (!zone_watermark_ok(zone, order, zone->pages_high,
169     + 0, 0)) {
170     + end_zone = i;
171     + goto scan;
172     }
173     - goto out;
174     - } else {
175     - end_zone = pgdat->nr_zones - 1;
176     }
177     + goto out;
178     scan:
179     for (i = 0; i <= end_zone; i++) {
180     struct zone *zone = pgdat->node_zones + i;
181     @@ -1137,11 +1130,9 @@ scan:
182     if (zone->all_unreclaimable && priority != DEF_PRIORITY)
183     continue;
184    
185     - if (nr_pages == 0) { /* Not software suspend */
186     - if (!zone_watermark_ok(zone, order,
187     - zone->pages_high, end_zone, 0))
188     - all_zones_ok = 0;
189     - }
190     + if (!zone_watermark_ok(zone, order, zone->pages_high,
191     + end_zone, 0))
192     + all_zones_ok = 0;
193     zone->temp_priority = priority;
194     if (zone->prev_priority > priority)
195     zone->prev_priority = priority;
196     @@ -1166,8 +1157,6 @@ scan:
197     total_scanned > nr_reclaimed + nr_reclaimed / 2)
198     sc.may_writepage = 1;
199     }
200     - if (nr_pages && to_free > nr_reclaimed)
201     - continue; /* swsusp: need to do more work */
202     if (all_zones_ok)
203     break; /* kswapd: all done */
204     /*
205     @@ -1183,7 +1172,7 @@ scan:
206     * matches the direct reclaim path behaviour in terms of impact
207     * on zone->*_priority.
208     */
209     - if ((nr_reclaimed >= SWAP_CLUSTER_MAX) && !nr_pages)
210     + if (nr_reclaimed >= SWAP_CLUSTER_MAX)
211     break;
212     }
213     out:
214     @@ -1265,7 +1254,7 @@ static int kswapd(void *p)
215     }
216     finish_wait(&pgdat->kswapd_wait, &wait);
217    
218     - balance_pgdat(pgdat, 0, order);
219     + balance_pgdat(pgdat, order);
220     }
221     return 0;
222     }
223     @@ -1294,37 +1283,154 @@ void wakeup_kswapd(struct zone *zone, in
224    
225     #ifdef CONFIG_PM
226     /*
227     - * Try to free `nr_pages' of memory, system-wide. Returns the number of freed
228     - * pages.
229     + * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages
230     + * from LRU lists system-wide, for given pass and priority, and returns the
231     + * number of reclaimed pages
232     + *
233     + * For pass > 3 we also try to shrink the LRU lists that contain a few pages
234     + */
235     +unsigned long shrink_all_zones(unsigned long nr_pages, int pass, int prio,
236     + struct scan_control *sc)
237     +{
238     + struct zone *zone;
239     + unsigned long nr_to_scan, ret = 0;
240     +
241     + for_each_zone(zone) {
242     +
243     + if (!populated_zone(zone))
244     + continue;
245     +
246     + if (zone->all_unreclaimable && prio != DEF_PRIORITY)
247     + continue;
248     +
249     + /* For pass = 0 we don't shrink the active list */
250     + if (pass > 0) {
251     + zone->nr_scan_active += (zone->nr_active >> prio) + 1;
252     + if (zone->nr_scan_active >= nr_pages || pass > 3) {
253     + zone->nr_scan_active = 0;
254     + nr_to_scan = min(nr_pages, zone->nr_active);
255     + shrink_active_list(nr_to_scan, zone, sc);
256     + }
257     + }
258     +
259     + zone->nr_scan_inactive += (zone->nr_inactive >> prio) + 1;
260     + if (zone->nr_scan_inactive >= nr_pages || pass > 3) {
261     + zone->nr_scan_inactive = 0;
262     + nr_to_scan = min(nr_pages, zone->nr_inactive);
263     + ret += shrink_inactive_list(nr_to_scan, zone, sc);
264     + if (ret >= nr_pages)
265     + return ret;
266     + }
267     + }
268     +
269     + return ret;
270     +}
271     +
272     +/*
273     + * Try to free `nr_pages' of memory, system-wide, and return the number of
274     + * freed pages.
275     + *
276     + * Rather than trying to age LRUs the aim is to preserve the overall
277     + * LRU order by reclaiming preferentially
278     + * inactive > active > active referenced > active mapped
279     */
280     unsigned long shrink_all_memory(unsigned long nr_pages)
281     {
282     - pg_data_t *pgdat;
283     - unsigned long nr_to_free = nr_pages;
284     + unsigned long lru_pages, nr_slab;
285     unsigned long ret = 0;
286     - unsigned retry = 2;
287     - struct reclaim_state reclaim_state = {
288     - .reclaimed_slab = 0,
289     + int pass;
290     + struct reclaim_state reclaim_state;
291     + struct zone *zone;
292     + struct scan_control sc = {
293     + .gfp_mask = GFP_KERNEL,
294     + .may_swap = 0,
295     + .swap_cluster_max = nr_pages,
296     + .may_writepage = 1,
297     + .swappiness = vm_swappiness,
298     };
299    
300     - delay_swap_prefetch();
301     -
302     current->reclaim_state = &reclaim_state;
303     -repeat:
304     - for_each_online_pgdat(pgdat) {
305     - unsigned long freed;
306    
307     - freed = balance_pgdat(pgdat, nr_to_free, 0);
308     - ret += freed;
309     - nr_to_free -= freed;
310     - if ((long)nr_to_free <= 0)
311     + lru_pages = 0;
312     + for_each_zone(zone)
313     + lru_pages += zone->nr_active + zone->nr_inactive;
314     +
315     + nr_slab = read_page_state(nr_slab);
316     + /* If slab caches are huge, it's better to hit them first */
317     + while (nr_slab >= lru_pages) {
318     + reclaim_state.reclaimed_slab = 0;
319     + shrink_slab(nr_pages, sc.gfp_mask, lru_pages);
320     + if (!reclaim_state.reclaimed_slab)
321     break;
322     +
323     + ret += reclaim_state.reclaimed_slab;
324     + if (ret >= nr_pages)
325     + goto out;
326     +
327     + nr_slab -= reclaim_state.reclaimed_slab;
328     }
329     - if (retry-- && ret < nr_pages) {
330     - blk_congestion_wait(WRITE, HZ/5);
331     - goto repeat;
332     +
333     + /*
334     + * We try to shrink LRUs in 5 passes:
335     + * 0 = Reclaim from inactive_list only
336     + * 1 = Reclaim from active list but don't reclaim mapped
337     + * 2 = 2nd pass of type 1
338     + * 3 = Reclaim mapped (normal reclaim)
339     + * 4 = 2nd pass of type 3
340     + */
341     + for (pass = 0; pass < 5; pass++) {
342     + int prio;
343     +
344     + /* Needed for shrinking slab caches later on */
345     + if (!lru_pages)
346     + for_each_zone(zone) {
347     + lru_pages += zone->nr_active;
348     + lru_pages += zone->nr_inactive;
349     + }
350     +
351     + /* Force reclaiming mapped pages in the passes #3 and #4 */
352     + if (pass > 2) {
353     + sc.may_swap = 1;
354     + sc.swappiness = 100;
355     + }
356     +
357     + for (prio = DEF_PRIORITY; prio >= 0; prio--) {
358     + unsigned long nr_to_scan = nr_pages - ret;
359     +
360     + sc.nr_mapped = read_page_state(nr_mapped);
361     + sc.nr_scanned = 0;
362     +
363     + ret += shrink_all_zones(nr_to_scan, prio, pass, &sc);
364     + if (ret >= nr_pages)
365     + goto out;
366     +
367     + reclaim_state.reclaimed_slab = 0;
368     + shrink_slab(sc.nr_scanned, sc.gfp_mask, lru_pages);
369     + ret += reclaim_state.reclaimed_slab;
370     + if (ret >= nr_pages)
371     + goto out;
372     +
373     + if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
374     + blk_congestion_wait(WRITE, HZ / 10);
375     + }
376     +
377     + lru_pages = 0;
378     }
379     +
380     + /*
381     + * If ret = 0, we could not shrink LRUs, but there may be something
382     + * in slab caches
383     + */
384     + if (!ret)
385     + do {
386     + reclaim_state.reclaimed_slab = 0;
387     + shrink_slab(nr_pages, sc.gfp_mask, lru_pages);
388     + ret += reclaim_state.reclaimed_slab;
389     + } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0);
390     +
391     +out:
392     current->reclaim_state = NULL;
393     +
394     return ret;
395     }
396     #endif
397     @@ -1422,6 +1528,7 @@ static int __zone_reclaim(struct zone *z
398     .swap_cluster_max = max_t(unsigned long, nr_pages,
399     SWAP_CLUSTER_MAX),
400     .gfp_mask = gfp_mask,
401     + .swappiness = vm_swappiness,
402     };
403    
404     disable_swap_token();