The vm currently performs scanning when allocating ram once the watermarks are below the pages_low value and tries to restore them to the pages_high watermark. The disadvantage of this is that we are scanning most aggresssively at the same time we are allocating ram regardless of the stress the vm is under. Add a pages_lots watermark and allow the watermark to be relaxed according to the stress the vm is at the time (according to the priority value). Thus we have more in reserve next time we are allocating ram and end up scanning less aggresssively. Note the actual pages_lots isn't used directly in this code. Signed-off-by: Con Kolivas include/linux/mmzone.h | 2 +- mm/page_alloc.c | 3 +++ mm/vmscan.c | 17 ++++++++++++++--- 3 files changed, 18 insertions(+), 4 deletions(-) Index: linux-2.6.20-ck1/include/linux/mmzone.h =================================================================== --- linux-2.6.20-ck1.orig/include/linux/mmzone.h 2007-02-05 22:52:04.000000000 +1100 +++ linux-2.6.20-ck1/include/linux/mmzone.h 2007-02-16 19:01:33.000000000 +1100 @@ -156,7 +156,7 @@ enum zone_type { struct zone { /* Fields commonly accessed by the page allocator */ unsigned long free_pages; - unsigned long pages_min, pages_low, pages_high; + unsigned long pages_min, pages_low, pages_high, pages_lots; /* * We don't know if the memory that we're going to allocate will be freeable * or/and it will be released eventually, so to avoid totally wasting several Index: linux-2.6.20-ck1/mm/page_alloc.c =================================================================== --- linux-2.6.20-ck1.orig/mm/page_alloc.c 2007-02-05 22:52:04.000000000 +1100 +++ linux-2.6.20-ck1/mm/page_alloc.c 2007-02-16 19:01:33.000000000 +1100 @@ -1604,6 +1604,7 @@ void show_free_areas(void) " min:%lukB" " low:%lukB" " high:%lukB" + " lots:%lukB" " active:%lukB" " inactive:%lukB" " present:%lukB" @@ -1615,6 +1616,7 @@ void show_free_areas(void) K(zone->pages_min), K(zone->pages_low), K(zone->pages_high), + K(zone->pages_lots), K(zone->nr_active), K(zone->nr_inactive), K(zone->present_pages), @@ -3149,6 +3151,7 @@ void setup_per_zone_pages_min(void) zone->pages_low = zone->pages_min + (tmp >> 2); zone->pages_high = zone->pages_min + (tmp >> 1); + zone->pages_lots = zone->pages_min + tmp; spin_unlock_irqrestore(&zone->lru_lock, flags); } Index: linux-2.6.20-ck1/mm/vmscan.c =================================================================== --- linux-2.6.20-ck1.orig/mm/vmscan.c 2007-02-16 19:01:33.000000000 +1100 +++ linux-2.6.20-ck1/mm/vmscan.c 2007-02-16 19:01:33.000000000 +1100 @@ -1172,6 +1172,7 @@ loop_again: */ for (i = pgdat->nr_zones - 1; i >= 0; i--) { struct zone *zone = pgdat->node_zones + i; + unsigned long watermark; if (!populated_zone(zone)) continue; @@ -1179,8 +1180,14 @@ loop_again: if (zone->all_unreclaimable && priority != DEF_PRIORITY) continue; - if (!zone_watermark_ok(zone, order, zone->pages_high, - 0, 0)) { + /* + * The watermark is relaxed depending on the + * level of "priority" till it drops to + * pages_high. + */ + watermark = zone->pages_high + (zone->pages_high * + priority / DEF_PRIORITY); + if (!zone_watermark_ok(zone, order, watermark, 0, 0)) { end_zone = i; break; } @@ -1206,6 +1213,7 @@ loop_again: for (i = 0; i <= end_zone; i++) { struct zone *zone = pgdat->node_zones + i; int nr_slab; + unsigned long watermark; if (!populated_zone(zone)) continue; @@ -1213,7 +1221,10 @@ loop_again: if (zone->all_unreclaimable && priority != DEF_PRIORITY) continue; - if (!zone_watermark_ok(zone, order, zone->pages_high, + watermark = zone->pages_high + (zone->pages_high * + priority / DEF_PRIORITY); + + if (!zone_watermark_ok(zone, order, watermark, end_zone, 0)) all_zones_ok = 0; temp_priority[i] = priority;