Annotation of /trunk/kernel26-alx/patches-2.6.17-r7/0005-2.6.17-sched-staircase16_compute_tunable.patch
Parent Directory | Revision Log
Revision 199 -
(hide annotations)
(download)
Fri May 18 11:04:36 2007 UTC (17 years, 4 months ago) by niro
File size: 6654 byte(s)
Fri May 18 11:04:36 2007 UTC (17 years, 4 months ago) by niro
File size: 6654 byte(s)
-import
1 | niro | 199 | Add the compute tunable for the staircase cpu scheduler. This modifies the |
2 | cpu scheduler behaviour for significantly longer cpu timeslices and delays | ||
3 | normal preemption to minimise the cpu cache harming effects of multiple | ||
4 | concurrent running tasks. This increases cpu throughput at the cost of | ||
5 | significantly increased latencies. | ||
6 | |||
7 | Signed-off-by: Con Kolivas <kernel@kolivas.org> | ||
8 | |||
9 | Documentation/sysctl/kernel.txt | 11 +++++++++++ | ||
10 | include/linux/sched.h | 2 +- | ||
11 | include/linux/sysctl.h | 1 + | ||
12 | kernel/sched.c | 40 ++++++++++++++++++++++++++++++++-------- | ||
13 | kernel/sysctl.c | 8 ++++++++ | ||
14 | 5 files changed, 53 insertions(+), 9 deletions(-) | ||
15 | |||
16 | Index: linux-ck-dev/include/linux/sched.h | ||
17 | =================================================================== | ||
18 | --- linux-ck-dev.orig/include/linux/sched.h 2006-06-18 15:23:07.000000000 +1000 | ||
19 | +++ linux-ck-dev/include/linux/sched.h 2006-06-18 15:23:21.000000000 +1000 | ||
20 | @@ -202,7 +202,7 @@ extern void show_stack(struct task_struc | ||
21 | |||
22 | void io_schedule(void); | ||
23 | long io_schedule_timeout(long timeout); | ||
24 | -extern int sched_interactive; | ||
25 | +extern int sched_interactive, sched_compute; | ||
26 | |||
27 | extern void cpu_init (void); | ||
28 | extern void trap_init(void); | ||
29 | Index: linux-ck-dev/include/linux/sysctl.h | ||
30 | =================================================================== | ||
31 | --- linux-ck-dev.orig/include/linux/sysctl.h 2006-06-18 15:23:07.000000000 +1000 | ||
32 | +++ linux-ck-dev/include/linux/sysctl.h 2006-06-18 15:23:21.000000000 +1000 | ||
33 | @@ -149,6 +149,7 @@ enum | ||
34 | KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */ | ||
35 | KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ | ||
36 | KERN_INTERACTIVE=73, /* interactive tasks can have cpu bursts */ | ||
37 | + KERN_COMPUTE=74, /* adjust timeslices for a compute server */ | ||
38 | }; | ||
39 | |||
40 | |||
41 | Index: linux-ck-dev/kernel/sched.c | ||
42 | =================================================================== | ||
43 | --- linux-ck-dev.orig/kernel/sched.c 2006-06-18 15:23:07.000000000 +1000 | ||
44 | +++ linux-ck-dev/kernel/sched.c 2006-06-18 15:23:21.000000000 +1000 | ||
45 | @@ -60,8 +60,17 @@ | ||
46 | /* | ||
47 | * sched_interactive - sysctl which allows interactive tasks to have bonus | ||
48 | * raise its priority. | ||
49 | + * sched_compute - sysctl which enables long timeslices and delayed preemption | ||
50 | + * for compute server usage. | ||
51 | */ | ||
52 | int sched_interactive __read_mostly = 1; | ||
53 | +int sched_compute __read_mostly; | ||
54 | + | ||
55 | +/* | ||
56 | + * CACHE_DELAY is the time preemption is delayed in sched_compute mode | ||
57 | + * and is set to a nominal 10ms. | ||
58 | + */ | ||
59 | +#define CACHE_DELAY (10 * (HZ) / 1001 + 1) | ||
60 | |||
61 | /* | ||
62 | * Convert user-nice values [ -20 ... 0 ... 19 ] | ||
63 | @@ -92,9 +101,10 @@ int sched_interactive __read_mostly = 1; | ||
64 | |||
65 | /* | ||
66 | * This is the time all tasks within the same priority round robin. | ||
67 | - * Set to a minimum of 6ms. | ||
68 | + * Set to a minimum of 6ms. It is 10 times longer in compute mode. | ||
69 | */ | ||
70 | -#define RR_INTERVAL ((6 * HZ / 1001) + 1) | ||
71 | +#define _RR_INTERVAL ((6 * HZ / 1001) + 1) | ||
72 | +#define RR_INTERVAL (_RR_INTERVAL * (1 + 9 * sched_compute)) | ||
73 | #define DEF_TIMESLICE (RR_INTERVAL * 19) | ||
74 | |||
75 | #define task_hot(p, now, sd) ((long long) ((now) - (p)->timestamp) \ | ||
76 | @@ -135,6 +145,7 @@ struct runqueue { | ||
77 | unsigned long nr_uninterruptible; | ||
78 | |||
79 | unsigned long long timestamp_last_tick; | ||
80 | + unsigned short cache_ticks, preempted; | ||
81 | task_t *curr, *idle; | ||
82 | struct mm_struct *prev_mm; | ||
83 | unsigned long bitmap[BITS_TO_LONGS(MAX_PRIO + 1)]; | ||
84 | @@ -737,7 +748,7 @@ static int effective_prio(const task_t * | ||
85 | |||
86 | best_bonus = bonus(p); | ||
87 | prio = MAX_RT_PRIO + best_bonus; | ||
88 | - if (sched_interactive && !batch_task(p)) | ||
89 | + if (sched_interactive && !sched_compute && !batch_task(p)) | ||
90 | prio -= p->bonus; | ||
91 | |||
92 | rr = rr_interval(p); | ||
93 | @@ -1209,12 +1220,21 @@ static inline int wake_idle(int cpu, tas | ||
94 | #endif | ||
95 | |||
96 | /* | ||
97 | - * Check to see if p preempts rq->curr and resched if it does. | ||
98 | + * Check to see if p preempts rq->curr and resched if it does. In compute | ||
99 | + * mode we do not preempt for at least CACHE_DELAY and set rq->preempted. | ||
100 | */ | ||
101 | -static inline void preempt(const task_t *p, runqueue_t *rq) | ||
102 | +static void fastcall preempt(const task_t *p, runqueue_t *rq) | ||
103 | { | ||
104 | - if (TASK_PREEMPTS_CURR(p, rq)) | ||
105 | - resched_task(rq->curr); | ||
106 | + task_t *curr = rq->curr; | ||
107 | + | ||
108 | + if (p->prio >= curr->prio) | ||
109 | + return; | ||
110 | + if (!sched_compute || rq->cache_ticks >= CACHE_DELAY || !p->mm || | ||
111 | + rt_task(p) || curr == rq->idle) { | ||
112 | + resched_task(curr); | ||
113 | + return; | ||
114 | + } | ||
115 | + rq->preempted = 1; | ||
116 | } | ||
117 | |||
118 | /*** | ||
119 | @@ -2667,6 +2687,9 @@ void scheduler_tick(void) | ||
120 | time_slice_expired(p, rq); | ||
121 | goto out_unlock; | ||
122 | } | ||
123 | + rq->cache_ticks++; | ||
124 | + if (rq->preempted && rq->cache_ticks >= CACHE_DELAY) | ||
125 | + set_tsk_need_resched(p); | ||
126 | out_unlock: | ||
127 | spin_unlock(&rq->lock); | ||
128 | out: | ||
129 | @@ -2933,6 +2956,7 @@ switch_tasks: | ||
130 | |||
131 | sched_info_switch(prev, next); | ||
132 | if (likely(prev != next)) { | ||
133 | + rq->preempted = rq->cache_ticks = 0; | ||
134 | next->timestamp = now; | ||
135 | rq->nr_switches++; | ||
136 | rq->curr = next; | ||
137 | @@ -5971,7 +5995,7 @@ void __init sched_init(void) | ||
138 | |||
139 | rq = cpu_rq(i); | ||
140 | spin_lock_init(&rq->lock); | ||
141 | - rq->nr_running = 0; | ||
142 | + rq->nr_running = rq->cache_ticks = rq->preempted = 0; | ||
143 | |||
144 | #ifdef CONFIG_SMP | ||
145 | rq->sd = NULL; | ||
146 | Index: linux-ck-dev/kernel/sysctl.c | ||
147 | =================================================================== | ||
148 | --- linux-ck-dev.orig/kernel/sysctl.c 2006-06-18 15:23:07.000000000 +1000 | ||
149 | +++ linux-ck-dev/kernel/sysctl.c 2006-06-18 15:23:21.000000000 +1000 | ||
150 | @@ -631,6 +631,14 @@ static ctl_table kern_table[] = { | ||
151 | .mode = 0644, | ||
152 | .proc_handler = &proc_dointvec, | ||
153 | }, | ||
154 | + { | ||
155 | + .ctl_name = KERN_COMPUTE, | ||
156 | + .procname = "compute", | ||
157 | + .data = &sched_compute, | ||
158 | + .maxlen = sizeof (int), | ||
159 | + .mode = 0644, | ||
160 | + .proc_handler = &proc_dointvec, | ||
161 | + }, | ||
162 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) | ||
163 | { | ||
164 | .ctl_name = KERN_UNKNOWN_NMI_PANIC, | ||
165 | Index: linux-ck-dev/Documentation/sysctl/kernel.txt | ||
166 | =================================================================== | ||
167 | --- linux-ck-dev.orig/Documentation/sysctl/kernel.txt 2006-06-18 15:23:07.000000000 +1000 | ||
168 | +++ linux-ck-dev/Documentation/sysctl/kernel.txt 2006-06-18 15:23:21.000000000 +1000 | ||
169 | @@ -18,6 +18,7 @@ Currently, these files might (depending | ||
170 | show up in /proc/sys/kernel: | ||
171 | - acpi_video_flags | ||
172 | - acct | ||
173 | +- compute | ||
174 | - core_pattern | ||
175 | - core_uses_pid | ||
176 | - ctrl-alt-del | ||
177 | @@ -85,6 +86,16 @@ valid for 30 seconds. | ||
178 | |||
179 | ============================================================== | ||
180 | |||
181 | +compute: | ||
182 | + | ||
183 | +This flag controls the long timeslice, delayed preemption mode in the | ||
184 | +cpu scheduler suitable for scientific computation applications. It | ||
185 | +leads to large latencies so is unsuitable for normal usage. | ||
186 | + | ||
187 | +Disabled by default. | ||
188 | + | ||
189 | +============================================================== | ||
190 | + | ||
191 | core_pattern: | ||
192 | |||
193 | core_pattern is used to specify a core dumpfile pattern name. |