Contents of /trunk/kernel26-alx/patches-2.6.17-r7/0007-2.6.17-sched-iso-4.5.patch
Parent Directory | Revision Log
Revision 199 -
(show annotations)
(download)
Fri May 18 11:04:36 2007 UTC (17 years, 4 months ago) by niro
File size: 10402 byte(s)
Fri May 18 11:04:36 2007 UTC (17 years, 4 months ago) by niro
File size: 10402 byte(s)
-import
1 | Add the SCHED_ISO policy (isochronous) which is a starvation free soft |
2 | realtime policy available to unprivileged users. The amount of cpu that |
3 | SCHED_ISO tasks will run as realtime is configurable by the tunable in |
4 | |
5 | /proc/sys/kernel/iso_cpu |
6 | |
7 | and is set to 80% (over 3 seconds) by default. |
8 | |
9 | Signed-off-by: Con Kolivas <kernel@kolivas.org> |
10 | |
11 | Documentation/sysctl/kernel.txt | 9 ++++ |
12 | include/linux/sched.h | 10 +++-- |
13 | include/linux/sysctl.h | 1 |
14 | kernel/sched.c | 77 ++++++++++++++++++++++++++++++++++++---- |
15 | kernel/sysctl.c | 22 ++++++++--- |
16 | 5 files changed, 104 insertions(+), 15 deletions(-) |
17 | |
18 | Index: linux-ck-dev/include/linux/sched.h |
19 | =================================================================== |
20 | --- linux-ck-dev.orig/include/linux/sched.h 2006-06-18 15:23:35.000000000 +1000 |
21 | +++ linux-ck-dev/include/linux/sched.h 2006-06-18 15:23:38.000000000 +1000 |
22 | @@ -164,9 +164,10 @@ extern unsigned long weighted_cpuload(co |
23 | #define SCHED_FIFO 1 |
24 | #define SCHED_RR 2 |
25 | #define SCHED_BATCH 3 |
26 | +#define SCHED_ISO 4 |
27 | |
28 | #define SCHED_MIN 0 |
29 | -#define SCHED_MAX 3 |
30 | +#define SCHED_MAX 4 |
31 | |
32 | #define SCHED_RANGE(policy) ((policy) <= SCHED_MAX) |
33 | #define SCHED_RT(policy) ((policy) == SCHED_FIFO || \ |
34 | @@ -209,7 +210,7 @@ extern void show_stack(struct task_struc |
35 | |
36 | void io_schedule(void); |
37 | long io_schedule_timeout(long timeout); |
38 | -extern int sched_interactive, sched_compute; |
39 | +extern int sched_interactive, sched_compute, sched_iso_cpu; |
40 | |
41 | extern void cpu_init (void); |
42 | extern void trap_init(void); |
43 | @@ -489,12 +490,14 @@ struct signal_struct { |
44 | |
45 | #define MAX_USER_RT_PRIO 100 |
46 | #define MAX_RT_PRIO MAX_USER_RT_PRIO |
47 | +#define ISO_PRIO (MAX_RT_PRIO - 1) |
48 | |
49 | #define MAX_PRIO (MAX_RT_PRIO + 40) |
50 | #define MIN_USER_PRIO (MAX_PRIO - 1) |
51 | |
52 | -#define rt_task(p) (unlikely((p)->prio < MAX_RT_PRIO)) |
53 | +#define rt_task(p) (unlikely(SCHED_RT((p)->policy))) |
54 | #define batch_task(p) (unlikely((p)->policy == SCHED_BATCH)) |
55 | +#define iso_task(p) (unlikely((p)->policy == SCHED_ISO)) |
56 | |
57 | /* |
58 | * Some day this will be a full-fledged user tracking system.. |
59 | @@ -954,6 +957,7 @@ static inline void put_task_struct(struc |
60 | #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ |
61 | #define PF_NONSLEEP 0x20000000 /* Waiting on in kernel activity */ |
62 | #define PF_FORKED 0x40000000 /* Task just forked another process */ |
63 | +#define PF_ISOREF 0x80000000 /* SCHED_ISO task has used up quota */ |
64 | |
65 | /* |
66 | * Only the _current_ task can read/write to tsk->flags, but other |
67 | Index: linux-ck-dev/include/linux/sysctl.h |
68 | =================================================================== |
69 | --- linux-ck-dev.orig/include/linux/sysctl.h 2006-06-18 15:23:21.000000000 +1000 |
70 | +++ linux-ck-dev/include/linux/sysctl.h 2006-06-18 15:23:38.000000000 +1000 |
71 | @@ -150,6 +150,7 @@ enum |
72 | KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ |
73 | KERN_INTERACTIVE=73, /* interactive tasks can have cpu bursts */ |
74 | KERN_COMPUTE=74, /* adjust timeslices for a compute server */ |
75 | + KERN_ISO_CPU=75, /* percent cpu SCHED_ISO tasks run SCHED_RR */ |
76 | }; |
77 | |
78 | |
79 | Index: linux-ck-dev/kernel/sched.c |
80 | =================================================================== |
81 | --- linux-ck-dev.orig/kernel/sched.c 2006-06-18 15:23:35.000000000 +1000 |
82 | +++ linux-ck-dev/kernel/sched.c 2006-06-18 15:23:38.000000000 +1000 |
83 | @@ -62,10 +62,14 @@ |
84 | * raise its priority. |
85 | * sched_compute - sysctl which enables long timeslices and delayed preemption |
86 | * for compute server usage. |
87 | + * sched_iso_cpu - sysctl which determines the cpu percentage SCHED_ISO tasks |
88 | + * are allowed to run (over ISO_PERIOD seconds) as real time tasks. |
89 | */ |
90 | int sched_interactive __read_mostly = 1; |
91 | int sched_compute __read_mostly; |
92 | +int sched_iso_cpu __read_mostly = 80; |
93 | |
94 | +#define ISO_PERIOD (5 * HZ) |
95 | /* |
96 | * CACHE_DELAY is the time preemption is delayed in sched_compute mode |
97 | * and is set to a nominal 10ms. |
98 | @@ -146,6 +150,9 @@ struct runqueue { |
99 | |
100 | unsigned long long timestamp_last_tick; |
101 | unsigned short cache_ticks, preempted; |
102 | + unsigned long iso_ticks; |
103 | + unsigned short iso_refractory; |
104 | + |
105 | task_t *curr, *idle; |
106 | struct mm_struct *prev_mm; |
107 | unsigned long bitmap[BITS_TO_LONGS(MAX_PRIO + 1)]; |
108 | @@ -742,6 +749,17 @@ static int effective_prio(const task_t * |
109 | if (rt_task(p)) |
110 | return p->prio; |
111 | |
112 | + if (iso_task(p)) { |
113 | + if (likely(!(p->flags & PF_ISOREF))) |
114 | + /* |
115 | + * If SCHED_ISO tasks have not used up their real time |
116 | + * quota they have run just better than highest |
117 | + * SCHED_NORMAL priority. Otherwise they run as |
118 | + * SCHED_NORMAL. |
119 | + */ |
120 | + return ISO_PRIO; |
121 | + } |
122 | + |
123 | full_slice = slice(p); |
124 | if (full_slice > p->slice) |
125 | used_slice = full_slice - p->slice; |
126 | @@ -2632,6 +2650,22 @@ static void time_slice_expired(task_t *p |
127 | } |
128 | |
129 | /* |
130 | + * Test if SCHED_ISO tasks have run longer than their alloted period as RT |
131 | + * tasks and set the refractory flag if necessary. There is 10% hysteresis |
132 | + * for unsetting the flag. |
133 | + */ |
134 | +static inline unsigned int test_ret_isorefractory(runqueue_t *rq) |
135 | +{ |
136 | + if (likely(!rq->iso_refractory)) { |
137 | + if (rq->iso_ticks / ISO_PERIOD > sched_iso_cpu) |
138 | + rq->iso_refractory = 1; |
139 | + } else |
140 | + if (rq->iso_ticks / ISO_PERIOD < (sched_iso_cpu * 90 / 100)) |
141 | + rq->iso_refractory = 0; |
142 | + return rq->iso_refractory; |
143 | +} |
144 | + |
145 | +/* |
146 | * This function gets called by the timer code, with HZ frequency. |
147 | * We call it with interrupts disabled. |
148 | */ |
149 | @@ -2659,11 +2693,29 @@ void scheduler_tick(void) |
150 | set_tsk_need_resched(p); |
151 | goto out; |
152 | } |
153 | - /* SCHED_FIFO tasks never run out of timeslice. */ |
154 | - if (unlikely(p->policy == SCHED_FIFO)) |
155 | - goto out; |
156 | |
157 | spin_lock(&rq->lock); |
158 | + if (unlikely((rt_task(p) || (iso_task(p) && !rq->iso_refractory)) && |
159 | + p->mm)) { |
160 | + if (rq->iso_ticks <= (ISO_PERIOD * 100) - 100) |
161 | + rq->iso_ticks += 100; |
162 | + } else |
163 | + rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - 1) / ISO_PERIOD; |
164 | + |
165 | + if (iso_task(p)) { |
166 | + if (unlikely(test_ret_isorefractory(rq))) { |
167 | + if (!(p->flags & PF_ISOREF)) { |
168 | + set_tsk_need_resched(p); |
169 | + p->flags |= PF_ISOREF; |
170 | + } |
171 | + } else |
172 | + p->flags &= ~PF_ISOREF; |
173 | + } else |
174 | + /* SCHED_FIFO tasks never run out of timeslice. */ |
175 | + if (unlikely(p->policy == SCHED_FIFO)) |
176 | + goto out_unlock; |
177 | + |
178 | + |
179 | debit = ns_diff(rq->timestamp_last_tick, p->timestamp); |
180 | p->ns_debit += debit; |
181 | if (p->ns_debit < NSJIFFY) |
182 | @@ -2758,7 +2810,7 @@ static int dependent_sleeper(int this_cp |
183 | int ret = 0, i; |
184 | |
185 | /* kernel/rt threads do not participate in dependent sleeping */ |
186 | - if (!p->mm || rt_task(p)) |
187 | + if (!p->mm || rt_task(p) || iso_task(p)) |
188 | return 0; |
189 | |
190 | for_each_domain(this_cpu, tmp) { |
191 | @@ -2795,7 +2847,7 @@ static int dependent_sleeper(int this_cp |
192 | * task from using an unfair proportion of the |
193 | * physical cpu's resources. -ck |
194 | */ |
195 | - if (rt_task(smt_curr)) { |
196 | + if (rt_task(smt_curr) || iso_task(smt_curr)) { |
197 | /* |
198 | * With real time tasks we run non-rt tasks only |
199 | * per_cpu_gain% of the time. |
200 | @@ -3567,9 +3619,19 @@ int sched_setscheduler(struct task_struc |
201 | { |
202 | int retval; |
203 | int queued, oldprio, oldpolicy = -1; |
204 | + struct sched_param zero_param = { .sched_priority = 0 }; |
205 | unsigned long flags; |
206 | runqueue_t *rq; |
207 | |
208 | + if (SCHED_RT(policy) && !capable(CAP_SYS_NICE)) { |
209 | + /* |
210 | + * If the caller requested an RT policy without having the |
211 | + * necessary rights, we downgrade the policy to SCHED_ISO. |
212 | + * We also set the parameter to zero to pass the checks. |
213 | + */ |
214 | + policy = SCHED_ISO; |
215 | + param = &zero_param; |
216 | + } |
217 | recheck: |
218 | /* double check policy once rq lock held */ |
219 | if (policy < 0) |
220 | @@ -4063,6 +4125,7 @@ asmlinkage long sys_sched_get_priority_m |
221 | break; |
222 | case SCHED_NORMAL: |
223 | case SCHED_BATCH: |
224 | + case SCHED_ISO: |
225 | ret = 0; |
226 | break; |
227 | } |
228 | @@ -4087,6 +4150,7 @@ asmlinkage long sys_sched_get_priority_m |
229 | break; |
230 | case SCHED_NORMAL: |
231 | case SCHED_BATCH: |
232 | + case SCHED_ISO: |
233 | ret = 0; |
234 | } |
235 | return ret; |
236 | @@ -5992,7 +6056,8 @@ void __init sched_init(void) |
237 | |
238 | rq = cpu_rq(i); |
239 | spin_lock_init(&rq->lock); |
240 | - rq->nr_running = rq->cache_ticks = rq->preempted = 0; |
241 | + rq->nr_running = rq->cache_ticks = rq->preempted = |
242 | + rq->iso_ticks = 0; |
243 | |
244 | #ifdef CONFIG_SMP |
245 | rq->sd = NULL; |
246 | Index: linux-ck-dev/kernel/sysctl.c |
247 | =================================================================== |
248 | --- linux-ck-dev.orig/kernel/sysctl.c 2006-06-18 15:23:21.000000000 +1000 |
249 | +++ linux-ck-dev/kernel/sysctl.c 2006-06-18 15:23:38.000000000 +1000 |
250 | @@ -229,6 +229,11 @@ static ctl_table root_table[] = { |
251 | { .ctl_name = 0 } |
252 | }; |
253 | |
254 | +/* Constants for minimum and maximum testing. |
255 | + We use these as one-element integer vectors. */ |
256 | +static int zero; |
257 | +static int one_hundred = 100; |
258 | + |
259 | static ctl_table kern_table[] = { |
260 | { |
261 | .ctl_name = KERN_OSTYPE, |
262 | @@ -639,6 +644,17 @@ static ctl_table kern_table[] = { |
263 | .mode = 0644, |
264 | .proc_handler = &proc_dointvec, |
265 | }, |
266 | + { |
267 | + .ctl_name = KERN_ISO_CPU, |
268 | + .procname = "iso_cpu", |
269 | + .data = &sched_iso_cpu, |
270 | + .maxlen = sizeof (int), |
271 | + .mode = 0644, |
272 | + .proc_handler = &proc_dointvec_minmax, |
273 | + .strategy = &sysctl_intvec, |
274 | + .extra1 = &zero, |
275 | + .extra2 = &one_hundred, |
276 | + }, |
277 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) |
278 | { |
279 | .ctl_name = KERN_UNKNOWN_NMI_PANIC, |
280 | @@ -702,12 +718,6 @@ static ctl_table kern_table[] = { |
281 | { .ctl_name = 0 } |
282 | }; |
283 | |
284 | -/* Constants for minimum and maximum testing in vm_table. |
285 | - We use these as one-element integer vectors. */ |
286 | -static int zero; |
287 | -static int one_hundred = 100; |
288 | - |
289 | - |
290 | static ctl_table vm_table[] = { |
291 | { |
292 | .ctl_name = VM_OVERCOMMIT_MEMORY, |
293 | Index: linux-ck-dev/Documentation/sysctl/kernel.txt |
294 | =================================================================== |
295 | --- linux-ck-dev.orig/Documentation/sysctl/kernel.txt 2006-06-18 15:23:21.000000000 +1000 |
296 | +++ linux-ck-dev/Documentation/sysctl/kernel.txt 2006-06-18 15:23:38.000000000 +1000 |
297 | @@ -27,6 +27,7 @@ show up in /proc/sys/kernel: |
298 | - hostname |
299 | - hotplug |
300 | - interactive |
301 | +- iso_cpu |
302 | - java-appletviewer [ binfmt_java, obsolete ] |
303 | - java-interpreter [ binfmt_java, obsolete ] |
304 | - l2cr [ PPC only ] |
305 | @@ -182,6 +183,14 @@ are obeyed if this tunable is disabled. |
306 | |
307 | ============================================================== |
308 | |
309 | +iso_cpu: |
310 | + |
311 | +This sets the percentage cpu that the unprivileged SCHED_ISO tasks can |
312 | +run effectively at realtime priority, averaged over a rolling 3 seconds. |
313 | +Set to 80% by default. |
314 | + |
315 | +============================================================== |
316 | + |
317 | l2cr: (PPC only) |
318 | |
319 | This flag controls the L2 cache of G3 processor boards. If |