Contents of /trunk/kernel26-alx/patches-2.6.17-r6/0005-2.6.17-sched-staircase16_compute_tunable.patch
Parent Directory | Revision Log
Revision 199 -
(show annotations)
(download)
Fri May 18 11:04:36 2007 UTC (17 years, 4 months ago) by niro
File size: 6654 byte(s)
Fri May 18 11:04:36 2007 UTC (17 years, 4 months ago) by niro
File size: 6654 byte(s)
-import
1 | Add the compute tunable for the staircase cpu scheduler. This modifies the |
2 | cpu scheduler behaviour for significantly longer cpu timeslices and delays |
3 | normal preemption to minimise the cpu cache harming effects of multiple |
4 | concurrent running tasks. This increases cpu throughput at the cost of |
5 | significantly increased latencies. |
6 | |
7 | Signed-off-by: Con Kolivas <kernel@kolivas.org> |
8 | |
9 | Documentation/sysctl/kernel.txt | 11 +++++++++++ |
10 | include/linux/sched.h | 2 +- |
11 | include/linux/sysctl.h | 1 + |
12 | kernel/sched.c | 40 ++++++++++++++++++++++++++++++++-------- |
13 | kernel/sysctl.c | 8 ++++++++ |
14 | 5 files changed, 53 insertions(+), 9 deletions(-) |
15 | |
16 | Index: linux-ck-dev/include/linux/sched.h |
17 | =================================================================== |
18 | --- linux-ck-dev.orig/include/linux/sched.h 2006-06-18 15:23:07.000000000 +1000 |
19 | +++ linux-ck-dev/include/linux/sched.h 2006-06-18 15:23:21.000000000 +1000 |
20 | @@ -202,7 +202,7 @@ extern void show_stack(struct task_struc |
21 | |
22 | void io_schedule(void); |
23 | long io_schedule_timeout(long timeout); |
24 | -extern int sched_interactive; |
25 | +extern int sched_interactive, sched_compute; |
26 | |
27 | extern void cpu_init (void); |
28 | extern void trap_init(void); |
29 | Index: linux-ck-dev/include/linux/sysctl.h |
30 | =================================================================== |
31 | --- linux-ck-dev.orig/include/linux/sysctl.h 2006-06-18 15:23:07.000000000 +1000 |
32 | +++ linux-ck-dev/include/linux/sysctl.h 2006-06-18 15:23:21.000000000 +1000 |
33 | @@ -149,6 +149,7 @@ enum |
34 | KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */ |
35 | KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ |
36 | KERN_INTERACTIVE=73, /* interactive tasks can have cpu bursts */ |
37 | + KERN_COMPUTE=74, /* adjust timeslices for a compute server */ |
38 | }; |
39 | |
40 | |
41 | Index: linux-ck-dev/kernel/sched.c |
42 | =================================================================== |
43 | --- linux-ck-dev.orig/kernel/sched.c 2006-06-18 15:23:07.000000000 +1000 |
44 | +++ linux-ck-dev/kernel/sched.c 2006-06-18 15:23:21.000000000 +1000 |
45 | @@ -60,8 +60,17 @@ |
46 | /* |
47 | * sched_interactive - sysctl which allows interactive tasks to have bonus |
48 | * raise its priority. |
49 | + * sched_compute - sysctl which enables long timeslices and delayed preemption |
50 | + * for compute server usage. |
51 | */ |
52 | int sched_interactive __read_mostly = 1; |
53 | +int sched_compute __read_mostly; |
54 | + |
55 | +/* |
56 | + * CACHE_DELAY is the time preemption is delayed in sched_compute mode |
57 | + * and is set to a nominal 10ms. |
58 | + */ |
59 | +#define CACHE_DELAY (10 * (HZ) / 1001 + 1) |
60 | |
61 | /* |
62 | * Convert user-nice values [ -20 ... 0 ... 19 ] |
63 | @@ -92,9 +101,10 @@ int sched_interactive __read_mostly = 1; |
64 | |
65 | /* |
66 | * This is the time all tasks within the same priority round robin. |
67 | - * Set to a minimum of 6ms. |
68 | + * Set to a minimum of 6ms. It is 10 times longer in compute mode. |
69 | */ |
70 | -#define RR_INTERVAL ((6 * HZ / 1001) + 1) |
71 | +#define _RR_INTERVAL ((6 * HZ / 1001) + 1) |
72 | +#define RR_INTERVAL (_RR_INTERVAL * (1 + 9 * sched_compute)) |
73 | #define DEF_TIMESLICE (RR_INTERVAL * 19) |
74 | |
75 | #define task_hot(p, now, sd) ((long long) ((now) - (p)->timestamp) \ |
76 | @@ -135,6 +145,7 @@ struct runqueue { |
77 | unsigned long nr_uninterruptible; |
78 | |
79 | unsigned long long timestamp_last_tick; |
80 | + unsigned short cache_ticks, preempted; |
81 | task_t *curr, *idle; |
82 | struct mm_struct *prev_mm; |
83 | unsigned long bitmap[BITS_TO_LONGS(MAX_PRIO + 1)]; |
84 | @@ -737,7 +748,7 @@ static int effective_prio(const task_t * |
85 | |
86 | best_bonus = bonus(p); |
87 | prio = MAX_RT_PRIO + best_bonus; |
88 | - if (sched_interactive && !batch_task(p)) |
89 | + if (sched_interactive && !sched_compute && !batch_task(p)) |
90 | prio -= p->bonus; |
91 | |
92 | rr = rr_interval(p); |
93 | @@ -1209,12 +1220,21 @@ static inline int wake_idle(int cpu, tas |
94 | #endif |
95 | |
96 | /* |
97 | - * Check to see if p preempts rq->curr and resched if it does. |
98 | + * Check to see if p preempts rq->curr and resched if it does. In compute |
99 | + * mode we do not preempt for at least CACHE_DELAY and set rq->preempted. |
100 | */ |
101 | -static inline void preempt(const task_t *p, runqueue_t *rq) |
102 | +static void fastcall preempt(const task_t *p, runqueue_t *rq) |
103 | { |
104 | - if (TASK_PREEMPTS_CURR(p, rq)) |
105 | - resched_task(rq->curr); |
106 | + task_t *curr = rq->curr; |
107 | + |
108 | + if (p->prio >= curr->prio) |
109 | + return; |
110 | + if (!sched_compute || rq->cache_ticks >= CACHE_DELAY || !p->mm || |
111 | + rt_task(p) || curr == rq->idle) { |
112 | + resched_task(curr); |
113 | + return; |
114 | + } |
115 | + rq->preempted = 1; |
116 | } |
117 | |
118 | /*** |
119 | @@ -2667,6 +2687,9 @@ void scheduler_tick(void) |
120 | time_slice_expired(p, rq); |
121 | goto out_unlock; |
122 | } |
123 | + rq->cache_ticks++; |
124 | + if (rq->preempted && rq->cache_ticks >= CACHE_DELAY) |
125 | + set_tsk_need_resched(p); |
126 | out_unlock: |
127 | spin_unlock(&rq->lock); |
128 | out: |
129 | @@ -2933,6 +2956,7 @@ switch_tasks: |
130 | |
131 | sched_info_switch(prev, next); |
132 | if (likely(prev != next)) { |
133 | + rq->preempted = rq->cache_ticks = 0; |
134 | next->timestamp = now; |
135 | rq->nr_switches++; |
136 | rq->curr = next; |
137 | @@ -5971,7 +5995,7 @@ void __init sched_init(void) |
138 | |
139 | rq = cpu_rq(i); |
140 | spin_lock_init(&rq->lock); |
141 | - rq->nr_running = 0; |
142 | + rq->nr_running = rq->cache_ticks = rq->preempted = 0; |
143 | |
144 | #ifdef CONFIG_SMP |
145 | rq->sd = NULL; |
146 | Index: linux-ck-dev/kernel/sysctl.c |
147 | =================================================================== |
148 | --- linux-ck-dev.orig/kernel/sysctl.c 2006-06-18 15:23:07.000000000 +1000 |
149 | +++ linux-ck-dev/kernel/sysctl.c 2006-06-18 15:23:21.000000000 +1000 |
150 | @@ -631,6 +631,14 @@ static ctl_table kern_table[] = { |
151 | .mode = 0644, |
152 | .proc_handler = &proc_dointvec, |
153 | }, |
154 | + { |
155 | + .ctl_name = KERN_COMPUTE, |
156 | + .procname = "compute", |
157 | + .data = &sched_compute, |
158 | + .maxlen = sizeof (int), |
159 | + .mode = 0644, |
160 | + .proc_handler = &proc_dointvec, |
161 | + }, |
162 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) |
163 | { |
164 | .ctl_name = KERN_UNKNOWN_NMI_PANIC, |
165 | Index: linux-ck-dev/Documentation/sysctl/kernel.txt |
166 | =================================================================== |
167 | --- linux-ck-dev.orig/Documentation/sysctl/kernel.txt 2006-06-18 15:23:07.000000000 +1000 |
168 | +++ linux-ck-dev/Documentation/sysctl/kernel.txt 2006-06-18 15:23:21.000000000 +1000 |
169 | @@ -18,6 +18,7 @@ Currently, these files might (depending |
170 | show up in /proc/sys/kernel: |
171 | - acpi_video_flags |
172 | - acct |
173 | +- compute |
174 | - core_pattern |
175 | - core_uses_pid |
176 | - ctrl-alt-del |
177 | @@ -85,6 +86,16 @@ valid for 30 seconds. |
178 | |
179 | ============================================================== |
180 | |
181 | +compute: |
182 | + |
183 | +This flag controls the long timeslice, delayed preemption mode in the |
184 | +cpu scheduler suitable for scientific computation applications. It |
185 | +leads to large latencies so is unsuitable for normal usage. |
186 | + |
187 | +Disabled by default. |
188 | + |
189 | +============================================================== |
190 | + |
191 | core_pattern: |
192 | |
193 | core_pattern is used to specify a core dumpfile pattern name. |