Magellan Linux

Annotation of /trunk/kernel26-alx/patches-2.6.17-r7/0005-2.6.17-sched-staircase16_compute_tunable.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 199 - (hide annotations) (download)
Fri May 18 11:04:36 2007 UTC (17 years ago) by niro
File size: 6654 byte(s)
-import

1 niro 199 Add the compute tunable for the staircase cpu scheduler. This modifies the
2     cpu scheduler behaviour for significantly longer cpu timeslices and delays
3     normal preemption to minimise the cpu cache harming effects of multiple
4     concurrent running tasks. This increases cpu throughput at the cost of
5     significantly increased latencies.
6    
7     Signed-off-by: Con Kolivas <kernel@kolivas.org>
8    
9     Documentation/sysctl/kernel.txt | 11 +++++++++++
10     include/linux/sched.h | 2 +-
11     include/linux/sysctl.h | 1 +
12     kernel/sched.c | 40 ++++++++++++++++++++++++++++++++--------
13     kernel/sysctl.c | 8 ++++++++
14     5 files changed, 53 insertions(+), 9 deletions(-)
15    
16     Index: linux-ck-dev/include/linux/sched.h
17     ===================================================================
18     --- linux-ck-dev.orig/include/linux/sched.h 2006-06-18 15:23:07.000000000 +1000
19     +++ linux-ck-dev/include/linux/sched.h 2006-06-18 15:23:21.000000000 +1000
20     @@ -202,7 +202,7 @@ extern void show_stack(struct task_struc
21    
22     void io_schedule(void);
23     long io_schedule_timeout(long timeout);
24     -extern int sched_interactive;
25     +extern int sched_interactive, sched_compute;
26    
27     extern void cpu_init (void);
28     extern void trap_init(void);
29     Index: linux-ck-dev/include/linux/sysctl.h
30     ===================================================================
31     --- linux-ck-dev.orig/include/linux/sysctl.h 2006-06-18 15:23:07.000000000 +1000
32     +++ linux-ck-dev/include/linux/sysctl.h 2006-06-18 15:23:21.000000000 +1000
33     @@ -149,6 +149,7 @@ enum
34     KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */
35     KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */
36     KERN_INTERACTIVE=73, /* interactive tasks can have cpu bursts */
37     + KERN_COMPUTE=74, /* adjust timeslices for a compute server */
38     };
39    
40    
41     Index: linux-ck-dev/kernel/sched.c
42     ===================================================================
43     --- linux-ck-dev.orig/kernel/sched.c 2006-06-18 15:23:07.000000000 +1000
44     +++ linux-ck-dev/kernel/sched.c 2006-06-18 15:23:21.000000000 +1000
45     @@ -60,8 +60,17 @@
46     /*
47     * sched_interactive - sysctl which allows interactive tasks to have bonus
48     * raise its priority.
49     + * sched_compute - sysctl which enables long timeslices and delayed preemption
50     + * for compute server usage.
51     */
52     int sched_interactive __read_mostly = 1;
53     +int sched_compute __read_mostly;
54     +
55     +/*
56     + * CACHE_DELAY is the time preemption is delayed in sched_compute mode
57     + * and is set to a nominal 10ms.
58     + */
59     +#define CACHE_DELAY (10 * (HZ) / 1001 + 1)
60    
61     /*
62     * Convert user-nice values [ -20 ... 0 ... 19 ]
63     @@ -92,9 +101,10 @@ int sched_interactive __read_mostly = 1;
64    
65     /*
66     * This is the time all tasks within the same priority round robin.
67     - * Set to a minimum of 6ms.
68     + * Set to a minimum of 6ms. It is 10 times longer in compute mode.
69     */
70     -#define RR_INTERVAL ((6 * HZ / 1001) + 1)
71     +#define _RR_INTERVAL ((6 * HZ / 1001) + 1)
72     +#define RR_INTERVAL (_RR_INTERVAL * (1 + 9 * sched_compute))
73     #define DEF_TIMESLICE (RR_INTERVAL * 19)
74    
75     #define task_hot(p, now, sd) ((long long) ((now) - (p)->timestamp) \
76     @@ -135,6 +145,7 @@ struct runqueue {
77     unsigned long nr_uninterruptible;
78    
79     unsigned long long timestamp_last_tick;
80     + unsigned short cache_ticks, preempted;
81     task_t *curr, *idle;
82     struct mm_struct *prev_mm;
83     unsigned long bitmap[BITS_TO_LONGS(MAX_PRIO + 1)];
84     @@ -737,7 +748,7 @@ static int effective_prio(const task_t *
85    
86     best_bonus = bonus(p);
87     prio = MAX_RT_PRIO + best_bonus;
88     - if (sched_interactive && !batch_task(p))
89     + if (sched_interactive && !sched_compute && !batch_task(p))
90     prio -= p->bonus;
91    
92     rr = rr_interval(p);
93     @@ -1209,12 +1220,21 @@ static inline int wake_idle(int cpu, tas
94     #endif
95    
96     /*
97     - * Check to see if p preempts rq->curr and resched if it does.
98     + * Check to see if p preempts rq->curr and resched if it does. In compute
99     + * mode we do not preempt for at least CACHE_DELAY and set rq->preempted.
100     */
101     -static inline void preempt(const task_t *p, runqueue_t *rq)
102     +static void fastcall preempt(const task_t *p, runqueue_t *rq)
103     {
104     - if (TASK_PREEMPTS_CURR(p, rq))
105     - resched_task(rq->curr);
106     + task_t *curr = rq->curr;
107     +
108     + if (p->prio >= curr->prio)
109     + return;
110     + if (!sched_compute || rq->cache_ticks >= CACHE_DELAY || !p->mm ||
111     + rt_task(p) || curr == rq->idle) {
112     + resched_task(curr);
113     + return;
114     + }
115     + rq->preempted = 1;
116     }
117    
118     /***
119     @@ -2667,6 +2687,9 @@ void scheduler_tick(void)
120     time_slice_expired(p, rq);
121     goto out_unlock;
122     }
123     + rq->cache_ticks++;
124     + if (rq->preempted && rq->cache_ticks >= CACHE_DELAY)
125     + set_tsk_need_resched(p);
126     out_unlock:
127     spin_unlock(&rq->lock);
128     out:
129     @@ -2933,6 +2956,7 @@ switch_tasks:
130    
131     sched_info_switch(prev, next);
132     if (likely(prev != next)) {
133     + rq->preempted = rq->cache_ticks = 0;
134     next->timestamp = now;
135     rq->nr_switches++;
136     rq->curr = next;
137     @@ -5971,7 +5995,7 @@ void __init sched_init(void)
138    
139     rq = cpu_rq(i);
140     spin_lock_init(&rq->lock);
141     - rq->nr_running = 0;
142     + rq->nr_running = rq->cache_ticks = rq->preempted = 0;
143    
144     #ifdef CONFIG_SMP
145     rq->sd = NULL;
146     Index: linux-ck-dev/kernel/sysctl.c
147     ===================================================================
148     --- linux-ck-dev.orig/kernel/sysctl.c 2006-06-18 15:23:07.000000000 +1000
149     +++ linux-ck-dev/kernel/sysctl.c 2006-06-18 15:23:21.000000000 +1000
150     @@ -631,6 +631,14 @@ static ctl_table kern_table[] = {
151     .mode = 0644,
152     .proc_handler = &proc_dointvec,
153     },
154     + {
155     + .ctl_name = KERN_COMPUTE,
156     + .procname = "compute",
157     + .data = &sched_compute,
158     + .maxlen = sizeof (int),
159     + .mode = 0644,
160     + .proc_handler = &proc_dointvec,
161     + },
162     #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
163     {
164     .ctl_name = KERN_UNKNOWN_NMI_PANIC,
165     Index: linux-ck-dev/Documentation/sysctl/kernel.txt
166     ===================================================================
167     --- linux-ck-dev.orig/Documentation/sysctl/kernel.txt 2006-06-18 15:23:07.000000000 +1000
168     +++ linux-ck-dev/Documentation/sysctl/kernel.txt 2006-06-18 15:23:21.000000000 +1000
169     @@ -18,6 +18,7 @@ Currently, these files might (depending
170     show up in /proc/sys/kernel:
171     - acpi_video_flags
172     - acct
173     +- compute
174     - core_pattern
175     - core_uses_pid
176     - ctrl-alt-del
177     @@ -85,6 +86,16 @@ valid for 30 seconds.
178    
179     ==============================================================
180    
181     +compute:
182     +
183     +This flag controls the long timeslice, delayed preemption mode in the
184     +cpu scheduler suitable for scientific computation applications. It
185     +leads to large latencies so is unsuitable for normal usage.
186     +
187     +Disabled by default.
188     +
189     +==============================================================
190     +
191     core_pattern:
192    
193     core_pattern is used to specify a core dumpfile pattern name.