Set the initial "priority" of memory reclaim scanning according to the cpu scheduling priority thus determining how aggressively reclaim is to initally progress according to nice level. Signed-off-by: Con Kolivas fs/buffer.c | 2 +- include/linux/swap.h | 3 ++- mm/page_alloc.c | 2 +- mm/vmscan.c | 37 ++++++++++++++++++++++++------------- 4 files changed, 28 insertions(+), 16 deletions(-) Index: linux-2.6.21-ck2/fs/buffer.c =================================================================== --- linux-2.6.21-ck2.orig/fs/buffer.c 2007-05-14 19:49:18.000000000 +1000 +++ linux-2.6.21-ck2/fs/buffer.c 2007-05-14 19:49:56.000000000 +1000 @@ -363,7 +363,7 @@ static void free_more_memory(void) for_each_online_pgdat(pgdat) { zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones; if (*zones) - try_to_free_pages(zones, GFP_NOFS); + try_to_free_pages(zones, GFP_NOFS, NULL); } } Index: linux-2.6.21-ck2/include/linux/swap.h =================================================================== --- linux-2.6.21-ck2.orig/include/linux/swap.h 2007-05-14 19:49:55.000000000 +1000 +++ linux-2.6.21-ck2/include/linux/swap.h 2007-05-14 19:49:56.000000000 +1000 @@ -189,7 +189,8 @@ extern int rotate_reclaimable_page(struc extern void swap_setup(void); /* linux/mm/vmscan.c */ -extern unsigned long try_to_free_pages(struct zone **, gfp_t); +extern unsigned long try_to_free_pages(struct zone **, gfp_t, + struct task_struct *p); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_mapped; extern int vm_hardmaplimit; Index: linux-2.6.21-ck2/mm/page_alloc.c =================================================================== --- linux-2.6.21-ck2.orig/mm/page_alloc.c 2007-05-14 19:49:55.000000000 +1000 +++ linux-2.6.21-ck2/mm/page_alloc.c 2007-05-14 19:49:56.000000000 +1000 @@ -1341,7 +1341,7 @@ nofail_alloc: reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; - did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask); + did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask, p); p->reclaim_state = NULL; p->flags &= ~PF_MEMALLOC; Index: linux-2.6.21-ck2/mm/vmscan.c =================================================================== --- linux-2.6.21-ck2.orig/mm/vmscan.c 2007-05-14 19:49:55.000000000 +1000 +++ linux-2.6.21-ck2/mm/vmscan.c 2007-05-14 19:49:56.000000000 +1000 @@ -993,6 +993,11 @@ static void set_kswapd_nice(struct task_ set_user_nice(kswapd, nice); } +static int sc_priority(struct task_struct *p) +{ + return (DEF_PRIORITY + (DEF_PRIORITY * effective_sc_prio(p) / 40)); +} + /* * This is the direct reclaim path, for page-allocating processes. We only * try to reclaim pages from zones which will satisfy the caller's allocation @@ -1050,7 +1055,8 @@ static unsigned long shrink_zones(int pr * holds filesystem locks which prevent writeout this might not work, and the * allocation attempt will fail. */ -unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) +unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask, + struct task_struct *p) { int priority; int ret = 0; @@ -1058,7 +1064,7 @@ unsigned long try_to_free_pages(struct z unsigned long nr_reclaimed = 0; struct reclaim_state *reclaim_state = current->reclaim_state; unsigned long lru_pages = 0; - int i; + int i, scan_priority = DEF_PRIORITY; struct scan_control sc = { .gfp_mask = gfp_mask, .may_writepage = !laptop_mode, @@ -1067,6 +1073,9 @@ unsigned long try_to_free_pages(struct z .mapped = vm_mapped, }; + if (p) + scan_priority = sc_priority(p); + delay_swap_prefetch(); count_vm_event(ALLOCSTALL); @@ -1081,7 +1090,7 @@ unsigned long try_to_free_pages(struct z + zone_page_state(zone, NR_INACTIVE); } - for (priority = DEF_PRIORITY; priority >= 0; priority--) { + for (priority = scan_priority; priority >= 0; priority--) { sc.nr_scanned = 0; if (!priority) disable_swap_token(); @@ -1111,7 +1120,7 @@ unsigned long try_to_free_pages(struct z } /* Take a nap, wait for some writeback to complete */ - if (sc.nr_scanned && priority < DEF_PRIORITY - 2) + if (sc.nr_scanned && priority < scan_priority - 2) congestion_wait(WRITE, HZ/10); } /* top priority shrink_caches still had more to do? don't OOM, then */ @@ -1161,9 +1170,9 @@ out: */ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) { - int all_zones_ok; + int all_zones_ok = 0; int priority; - int i; + int i, scan_priority; unsigned long total_scanned; unsigned long nr_reclaimed; struct reclaim_state *reclaim_state = current->reclaim_state; @@ -1179,6 +1188,8 @@ static unsigned long balance_pgdat(pg_da */ int temp_priority[MAX_NR_ZONES]; + scan_priority = sc_priority(pgdat->kswapd); + loop_again: total_scanned = 0; nr_reclaimed = 0; @@ -1186,9 +1197,9 @@ loop_again: count_vm_event(PAGEOUTRUN); for (i = 0; i < pgdat->nr_zones; i++) - temp_priority[i] = DEF_PRIORITY; + temp_priority[i] = scan_priority; - for (priority = DEF_PRIORITY; priority >= 0; priority--) { + for (priority = scan_priority; priority >= 0; priority--) { int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ unsigned long lru_pages = 0; @@ -1209,7 +1220,7 @@ loop_again: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && priority != scan_priority) continue; /* @@ -1218,7 +1229,7 @@ loop_again: * pages_high. */ watermark = zone->pages_high + (zone->pages_high * - priority / DEF_PRIORITY); + priority / scan_priority); if (!zone_watermark_ok(zone, order, watermark, 0, 0)) { end_zone = i; break; @@ -1251,11 +1262,11 @@ loop_again: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && priority != scan_priority) continue; watermark = zone->pages_high + (zone->pages_high * - priority / DEF_PRIORITY); + priority / scan_priority); if (!zone_watermark_ok(zone, order, watermark, end_zone, 0)) @@ -1290,7 +1301,7 @@ loop_again: * OK, kswapd is getting into trouble. Take a nap, then take * another pass across the zones. */ - if (total_scanned && priority < DEF_PRIORITY - 2) + if (total_scanned && priority < scan_priority - 2) congestion_wait(WRITE, HZ/10); /*