Annotation of /trunk/kernel26-alx/patches-2.6.17-r5/0007-2.6.17-sched-iso-4.5.patch
Parent Directory | Revision Log
Revision 199 -
(hide annotations)
(download)
Fri May 18 11:04:36 2007 UTC (17 years, 4 months ago) by niro
File size: 10402 byte(s)
Fri May 18 11:04:36 2007 UTC (17 years, 4 months ago) by niro
File size: 10402 byte(s)
-import
1 | niro | 199 | Add the SCHED_ISO policy (isochronous) which is a starvation free soft |
2 | realtime policy available to unprivileged users. The amount of cpu that | ||
3 | SCHED_ISO tasks will run as realtime is configurable by the tunable in | ||
4 | |||
5 | /proc/sys/kernel/iso_cpu | ||
6 | |||
7 | and is set to 80% (over 3 seconds) by default. | ||
8 | |||
9 | Signed-off-by: Con Kolivas <kernel@kolivas.org> | ||
10 | |||
11 | Documentation/sysctl/kernel.txt | 9 ++++ | ||
12 | include/linux/sched.h | 10 +++-- | ||
13 | include/linux/sysctl.h | 1 | ||
14 | kernel/sched.c | 77 ++++++++++++++++++++++++++++++++++++---- | ||
15 | kernel/sysctl.c | 22 ++++++++--- | ||
16 | 5 files changed, 104 insertions(+), 15 deletions(-) | ||
17 | |||
18 | Index: linux-ck-dev/include/linux/sched.h | ||
19 | =================================================================== | ||
20 | --- linux-ck-dev.orig/include/linux/sched.h 2006-06-18 15:23:35.000000000 +1000 | ||
21 | +++ linux-ck-dev/include/linux/sched.h 2006-06-18 15:23:38.000000000 +1000 | ||
22 | @@ -164,9 +164,10 @@ extern unsigned long weighted_cpuload(co | ||
23 | #define SCHED_FIFO 1 | ||
24 | #define SCHED_RR 2 | ||
25 | #define SCHED_BATCH 3 | ||
26 | +#define SCHED_ISO 4 | ||
27 | |||
28 | #define SCHED_MIN 0 | ||
29 | -#define SCHED_MAX 3 | ||
30 | +#define SCHED_MAX 4 | ||
31 | |||
32 | #define SCHED_RANGE(policy) ((policy) <= SCHED_MAX) | ||
33 | #define SCHED_RT(policy) ((policy) == SCHED_FIFO || \ | ||
34 | @@ -209,7 +210,7 @@ extern void show_stack(struct task_struc | ||
35 | |||
36 | void io_schedule(void); | ||
37 | long io_schedule_timeout(long timeout); | ||
38 | -extern int sched_interactive, sched_compute; | ||
39 | +extern int sched_interactive, sched_compute, sched_iso_cpu; | ||
40 | |||
41 | extern void cpu_init (void); | ||
42 | extern void trap_init(void); | ||
43 | @@ -489,12 +490,14 @@ struct signal_struct { | ||
44 | |||
45 | #define MAX_USER_RT_PRIO 100 | ||
46 | #define MAX_RT_PRIO MAX_USER_RT_PRIO | ||
47 | +#define ISO_PRIO (MAX_RT_PRIO - 1) | ||
48 | |||
49 | #define MAX_PRIO (MAX_RT_PRIO + 40) | ||
50 | #define MIN_USER_PRIO (MAX_PRIO - 1) | ||
51 | |||
52 | -#define rt_task(p) (unlikely((p)->prio < MAX_RT_PRIO)) | ||
53 | +#define rt_task(p) (unlikely(SCHED_RT((p)->policy))) | ||
54 | #define batch_task(p) (unlikely((p)->policy == SCHED_BATCH)) | ||
55 | +#define iso_task(p) (unlikely((p)->policy == SCHED_ISO)) | ||
56 | |||
57 | /* | ||
58 | * Some day this will be a full-fledged user tracking system.. | ||
59 | @@ -954,6 +957,7 @@ static inline void put_task_struct(struc | ||
60 | #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ | ||
61 | #define PF_NONSLEEP 0x20000000 /* Waiting on in kernel activity */ | ||
62 | #define PF_FORKED 0x40000000 /* Task just forked another process */ | ||
63 | +#define PF_ISOREF 0x80000000 /* SCHED_ISO task has used up quota */ | ||
64 | |||
65 | /* | ||
66 | * Only the _current_ task can read/write to tsk->flags, but other | ||
67 | Index: linux-ck-dev/include/linux/sysctl.h | ||
68 | =================================================================== | ||
69 | --- linux-ck-dev.orig/include/linux/sysctl.h 2006-06-18 15:23:21.000000000 +1000 | ||
70 | +++ linux-ck-dev/include/linux/sysctl.h 2006-06-18 15:23:38.000000000 +1000 | ||
71 | @@ -150,6 +150,7 @@ enum | ||
72 | KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ | ||
73 | KERN_INTERACTIVE=73, /* interactive tasks can have cpu bursts */ | ||
74 | KERN_COMPUTE=74, /* adjust timeslices for a compute server */ | ||
75 | + KERN_ISO_CPU=75, /* percent cpu SCHED_ISO tasks run SCHED_RR */ | ||
76 | }; | ||
77 | |||
78 | |||
79 | Index: linux-ck-dev/kernel/sched.c | ||
80 | =================================================================== | ||
81 | --- linux-ck-dev.orig/kernel/sched.c 2006-06-18 15:23:35.000000000 +1000 | ||
82 | +++ linux-ck-dev/kernel/sched.c 2006-06-18 15:23:38.000000000 +1000 | ||
83 | @@ -62,10 +62,14 @@ | ||
84 | * raise its priority. | ||
85 | * sched_compute - sysctl which enables long timeslices and delayed preemption | ||
86 | * for compute server usage. | ||
87 | + * sched_iso_cpu - sysctl which determines the cpu percentage SCHED_ISO tasks | ||
88 | + * are allowed to run (over ISO_PERIOD seconds) as real time tasks. | ||
89 | */ | ||
90 | int sched_interactive __read_mostly = 1; | ||
91 | int sched_compute __read_mostly; | ||
92 | +int sched_iso_cpu __read_mostly = 80; | ||
93 | |||
94 | +#define ISO_PERIOD (5 * HZ) | ||
95 | /* | ||
96 | * CACHE_DELAY is the time preemption is delayed in sched_compute mode | ||
97 | * and is set to a nominal 10ms. | ||
98 | @@ -146,6 +150,9 @@ struct runqueue { | ||
99 | |||
100 | unsigned long long timestamp_last_tick; | ||
101 | unsigned short cache_ticks, preempted; | ||
102 | + unsigned long iso_ticks; | ||
103 | + unsigned short iso_refractory; | ||
104 | + | ||
105 | task_t *curr, *idle; | ||
106 | struct mm_struct *prev_mm; | ||
107 | unsigned long bitmap[BITS_TO_LONGS(MAX_PRIO + 1)]; | ||
108 | @@ -742,6 +749,17 @@ static int effective_prio(const task_t * | ||
109 | if (rt_task(p)) | ||
110 | return p->prio; | ||
111 | |||
112 | + if (iso_task(p)) { | ||
113 | + if (likely(!(p->flags & PF_ISOREF))) | ||
114 | + /* | ||
115 | + * If SCHED_ISO tasks have not used up their real time | ||
116 | + * quota they have run just better than highest | ||
117 | + * SCHED_NORMAL priority. Otherwise they run as | ||
118 | + * SCHED_NORMAL. | ||
119 | + */ | ||
120 | + return ISO_PRIO; | ||
121 | + } | ||
122 | + | ||
123 | full_slice = slice(p); | ||
124 | if (full_slice > p->slice) | ||
125 | used_slice = full_slice - p->slice; | ||
126 | @@ -2632,6 +2650,22 @@ static void time_slice_expired(task_t *p | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | + * Test if SCHED_ISO tasks have run longer than their alloted period as RT | ||
131 | + * tasks and set the refractory flag if necessary. There is 10% hysteresis | ||
132 | + * for unsetting the flag. | ||
133 | + */ | ||
134 | +static inline unsigned int test_ret_isorefractory(runqueue_t *rq) | ||
135 | +{ | ||
136 | + if (likely(!rq->iso_refractory)) { | ||
137 | + if (rq->iso_ticks / ISO_PERIOD > sched_iso_cpu) | ||
138 | + rq->iso_refractory = 1; | ||
139 | + } else | ||
140 | + if (rq->iso_ticks / ISO_PERIOD < (sched_iso_cpu * 90 / 100)) | ||
141 | + rq->iso_refractory = 0; | ||
142 | + return rq->iso_refractory; | ||
143 | +} | ||
144 | + | ||
145 | +/* | ||
146 | * This function gets called by the timer code, with HZ frequency. | ||
147 | * We call it with interrupts disabled. | ||
148 | */ | ||
149 | @@ -2659,11 +2693,29 @@ void scheduler_tick(void) | ||
150 | set_tsk_need_resched(p); | ||
151 | goto out; | ||
152 | } | ||
153 | - /* SCHED_FIFO tasks never run out of timeslice. */ | ||
154 | - if (unlikely(p->policy == SCHED_FIFO)) | ||
155 | - goto out; | ||
156 | |||
157 | spin_lock(&rq->lock); | ||
158 | + if (unlikely((rt_task(p) || (iso_task(p) && !rq->iso_refractory)) && | ||
159 | + p->mm)) { | ||
160 | + if (rq->iso_ticks <= (ISO_PERIOD * 100) - 100) | ||
161 | + rq->iso_ticks += 100; | ||
162 | + } else | ||
163 | + rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - 1) / ISO_PERIOD; | ||
164 | + | ||
165 | + if (iso_task(p)) { | ||
166 | + if (unlikely(test_ret_isorefractory(rq))) { | ||
167 | + if (!(p->flags & PF_ISOREF)) { | ||
168 | + set_tsk_need_resched(p); | ||
169 | + p->flags |= PF_ISOREF; | ||
170 | + } | ||
171 | + } else | ||
172 | + p->flags &= ~PF_ISOREF; | ||
173 | + } else | ||
174 | + /* SCHED_FIFO tasks never run out of timeslice. */ | ||
175 | + if (unlikely(p->policy == SCHED_FIFO)) | ||
176 | + goto out_unlock; | ||
177 | + | ||
178 | + | ||
179 | debit = ns_diff(rq->timestamp_last_tick, p->timestamp); | ||
180 | p->ns_debit += debit; | ||
181 | if (p->ns_debit < NSJIFFY) | ||
182 | @@ -2758,7 +2810,7 @@ static int dependent_sleeper(int this_cp | ||
183 | int ret = 0, i; | ||
184 | |||
185 | /* kernel/rt threads do not participate in dependent sleeping */ | ||
186 | - if (!p->mm || rt_task(p)) | ||
187 | + if (!p->mm || rt_task(p) || iso_task(p)) | ||
188 | return 0; | ||
189 | |||
190 | for_each_domain(this_cpu, tmp) { | ||
191 | @@ -2795,7 +2847,7 @@ static int dependent_sleeper(int this_cp | ||
192 | * task from using an unfair proportion of the | ||
193 | * physical cpu's resources. -ck | ||
194 | */ | ||
195 | - if (rt_task(smt_curr)) { | ||
196 | + if (rt_task(smt_curr) || iso_task(smt_curr)) { | ||
197 | /* | ||
198 | * With real time tasks we run non-rt tasks only | ||
199 | * per_cpu_gain% of the time. | ||
200 | @@ -3567,9 +3619,19 @@ int sched_setscheduler(struct task_struc | ||
201 | { | ||
202 | int retval; | ||
203 | int queued, oldprio, oldpolicy = -1; | ||
204 | + struct sched_param zero_param = { .sched_priority = 0 }; | ||
205 | unsigned long flags; | ||
206 | runqueue_t *rq; | ||
207 | |||
208 | + if (SCHED_RT(policy) && !capable(CAP_SYS_NICE)) { | ||
209 | + /* | ||
210 | + * If the caller requested an RT policy without having the | ||
211 | + * necessary rights, we downgrade the policy to SCHED_ISO. | ||
212 | + * We also set the parameter to zero to pass the checks. | ||
213 | + */ | ||
214 | + policy = SCHED_ISO; | ||
215 | + param = &zero_param; | ||
216 | + } | ||
217 | recheck: | ||
218 | /* double check policy once rq lock held */ | ||
219 | if (policy < 0) | ||
220 | @@ -4063,6 +4125,7 @@ asmlinkage long sys_sched_get_priority_m | ||
221 | break; | ||
222 | case SCHED_NORMAL: | ||
223 | case SCHED_BATCH: | ||
224 | + case SCHED_ISO: | ||
225 | ret = 0; | ||
226 | break; | ||
227 | } | ||
228 | @@ -4087,6 +4150,7 @@ asmlinkage long sys_sched_get_priority_m | ||
229 | break; | ||
230 | case SCHED_NORMAL: | ||
231 | case SCHED_BATCH: | ||
232 | + case SCHED_ISO: | ||
233 | ret = 0; | ||
234 | } | ||
235 | return ret; | ||
236 | @@ -5992,7 +6056,8 @@ void __init sched_init(void) | ||
237 | |||
238 | rq = cpu_rq(i); | ||
239 | spin_lock_init(&rq->lock); | ||
240 | - rq->nr_running = rq->cache_ticks = rq->preempted = 0; | ||
241 | + rq->nr_running = rq->cache_ticks = rq->preempted = | ||
242 | + rq->iso_ticks = 0; | ||
243 | |||
244 | #ifdef CONFIG_SMP | ||
245 | rq->sd = NULL; | ||
246 | Index: linux-ck-dev/kernel/sysctl.c | ||
247 | =================================================================== | ||
248 | --- linux-ck-dev.orig/kernel/sysctl.c 2006-06-18 15:23:21.000000000 +1000 | ||
249 | +++ linux-ck-dev/kernel/sysctl.c 2006-06-18 15:23:38.000000000 +1000 | ||
250 | @@ -229,6 +229,11 @@ static ctl_table root_table[] = { | ||
251 | { .ctl_name = 0 } | ||
252 | }; | ||
253 | |||
254 | +/* Constants for minimum and maximum testing. | ||
255 | + We use these as one-element integer vectors. */ | ||
256 | +static int zero; | ||
257 | +static int one_hundred = 100; | ||
258 | + | ||
259 | static ctl_table kern_table[] = { | ||
260 | { | ||
261 | .ctl_name = KERN_OSTYPE, | ||
262 | @@ -639,6 +644,17 @@ static ctl_table kern_table[] = { | ||
263 | .mode = 0644, | ||
264 | .proc_handler = &proc_dointvec, | ||
265 | }, | ||
266 | + { | ||
267 | + .ctl_name = KERN_ISO_CPU, | ||
268 | + .procname = "iso_cpu", | ||
269 | + .data = &sched_iso_cpu, | ||
270 | + .maxlen = sizeof (int), | ||
271 | + .mode = 0644, | ||
272 | + .proc_handler = &proc_dointvec_minmax, | ||
273 | + .strategy = &sysctl_intvec, | ||
274 | + .extra1 = &zero, | ||
275 | + .extra2 = &one_hundred, | ||
276 | + }, | ||
277 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) | ||
278 | { | ||
279 | .ctl_name = KERN_UNKNOWN_NMI_PANIC, | ||
280 | @@ -702,12 +718,6 @@ static ctl_table kern_table[] = { | ||
281 | { .ctl_name = 0 } | ||
282 | }; | ||
283 | |||
284 | -/* Constants for minimum and maximum testing in vm_table. | ||
285 | - We use these as one-element integer vectors. */ | ||
286 | -static int zero; | ||
287 | -static int one_hundred = 100; | ||
288 | - | ||
289 | - | ||
290 | static ctl_table vm_table[] = { | ||
291 | { | ||
292 | .ctl_name = VM_OVERCOMMIT_MEMORY, | ||
293 | Index: linux-ck-dev/Documentation/sysctl/kernel.txt | ||
294 | =================================================================== | ||
295 | --- linux-ck-dev.orig/Documentation/sysctl/kernel.txt 2006-06-18 15:23:21.000000000 +1000 | ||
296 | +++ linux-ck-dev/Documentation/sysctl/kernel.txt 2006-06-18 15:23:38.000000000 +1000 | ||
297 | @@ -27,6 +27,7 @@ show up in /proc/sys/kernel: | ||
298 | - hostname | ||
299 | - hotplug | ||
300 | - interactive | ||
301 | +- iso_cpu | ||
302 | - java-appletviewer [ binfmt_java, obsolete ] | ||
303 | - java-interpreter [ binfmt_java, obsolete ] | ||
304 | - l2cr [ PPC only ] | ||
305 | @@ -182,6 +183,14 @@ are obeyed if this tunable is disabled. | ||
306 | |||
307 | ============================================================== | ||
308 | |||
309 | +iso_cpu: | ||
310 | + | ||
311 | +This sets the percentage cpu that the unprivileged SCHED_ISO tasks can | ||
312 | +run effectively at realtime priority, averaged over a rolling 3 seconds. | ||
313 | +Set to 80% by default. | ||
314 | + | ||
315 | +============================================================== | ||
316 | + | ||
317 | l2cr: (PPC only) | ||
318 | |||
319 | This flag controls the L2 cache of G3 processor boards. If |