Magellan Linux

Annotation of /trunk/kernel26-alx/patches-2.6.17-r6/0007-2.6.17-sched-iso-4.5.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 199 - (hide annotations) (download)
Fri May 18 11:04:36 2007 UTC (17 years ago) by niro
File size: 10402 byte(s)
-import

1 niro 199 Add the SCHED_ISO policy (isochronous) which is a starvation free soft
2     realtime policy available to unprivileged users. The amount of cpu that
3     SCHED_ISO tasks will run as realtime is configurable by the tunable in
4    
5     /proc/sys/kernel/iso_cpu
6    
7     and is set to 80% (over 3 seconds) by default.
8    
9     Signed-off-by: Con Kolivas <kernel@kolivas.org>
10    
11     Documentation/sysctl/kernel.txt | 9 ++++
12     include/linux/sched.h | 10 +++--
13     include/linux/sysctl.h | 1
14     kernel/sched.c | 77 ++++++++++++++++++++++++++++++++++++----
15     kernel/sysctl.c | 22 ++++++++---
16     5 files changed, 104 insertions(+), 15 deletions(-)
17    
18     Index: linux-ck-dev/include/linux/sched.h
19     ===================================================================
20     --- linux-ck-dev.orig/include/linux/sched.h 2006-06-18 15:23:35.000000000 +1000
21     +++ linux-ck-dev/include/linux/sched.h 2006-06-18 15:23:38.000000000 +1000
22     @@ -164,9 +164,10 @@ extern unsigned long weighted_cpuload(co
23     #define SCHED_FIFO 1
24     #define SCHED_RR 2
25     #define SCHED_BATCH 3
26     +#define SCHED_ISO 4
27    
28     #define SCHED_MIN 0
29     -#define SCHED_MAX 3
30     +#define SCHED_MAX 4
31    
32     #define SCHED_RANGE(policy) ((policy) <= SCHED_MAX)
33     #define SCHED_RT(policy) ((policy) == SCHED_FIFO || \
34     @@ -209,7 +210,7 @@ extern void show_stack(struct task_struc
35    
36     void io_schedule(void);
37     long io_schedule_timeout(long timeout);
38     -extern int sched_interactive, sched_compute;
39     +extern int sched_interactive, sched_compute, sched_iso_cpu;
40    
41     extern void cpu_init (void);
42     extern void trap_init(void);
43     @@ -489,12 +490,14 @@ struct signal_struct {
44    
45     #define MAX_USER_RT_PRIO 100
46     #define MAX_RT_PRIO MAX_USER_RT_PRIO
47     +#define ISO_PRIO (MAX_RT_PRIO - 1)
48    
49     #define MAX_PRIO (MAX_RT_PRIO + 40)
50     #define MIN_USER_PRIO (MAX_PRIO - 1)
51    
52     -#define rt_task(p) (unlikely((p)->prio < MAX_RT_PRIO))
53     +#define rt_task(p) (unlikely(SCHED_RT((p)->policy)))
54     #define batch_task(p) (unlikely((p)->policy == SCHED_BATCH))
55     +#define iso_task(p) (unlikely((p)->policy == SCHED_ISO))
56    
57     /*
58     * Some day this will be a full-fledged user tracking system..
59     @@ -954,6 +957,7 @@ static inline void put_task_struct(struc
60     #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
61     #define PF_NONSLEEP 0x20000000 /* Waiting on in kernel activity */
62     #define PF_FORKED 0x40000000 /* Task just forked another process */
63     +#define PF_ISOREF 0x80000000 /* SCHED_ISO task has used up quota */
64    
65     /*
66     * Only the _current_ task can read/write to tsk->flags, but other
67     Index: linux-ck-dev/include/linux/sysctl.h
68     ===================================================================
69     --- linux-ck-dev.orig/include/linux/sysctl.h 2006-06-18 15:23:21.000000000 +1000
70     +++ linux-ck-dev/include/linux/sysctl.h 2006-06-18 15:23:38.000000000 +1000
71     @@ -150,6 +150,7 @@ enum
72     KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */
73     KERN_INTERACTIVE=73, /* interactive tasks can have cpu bursts */
74     KERN_COMPUTE=74, /* adjust timeslices for a compute server */
75     + KERN_ISO_CPU=75, /* percent cpu SCHED_ISO tasks run SCHED_RR */
76     };
77    
78    
79     Index: linux-ck-dev/kernel/sched.c
80     ===================================================================
81     --- linux-ck-dev.orig/kernel/sched.c 2006-06-18 15:23:35.000000000 +1000
82     +++ linux-ck-dev/kernel/sched.c 2006-06-18 15:23:38.000000000 +1000
83     @@ -62,10 +62,14 @@
84     * raise its priority.
85     * sched_compute - sysctl which enables long timeslices and delayed preemption
86     * for compute server usage.
87     + * sched_iso_cpu - sysctl which determines the cpu percentage SCHED_ISO tasks
88     + * are allowed to run (over ISO_PERIOD seconds) as real time tasks.
89     */
90     int sched_interactive __read_mostly = 1;
91     int sched_compute __read_mostly;
92     +int sched_iso_cpu __read_mostly = 80;
93    
94     +#define ISO_PERIOD (5 * HZ)
95     /*
96     * CACHE_DELAY is the time preemption is delayed in sched_compute mode
97     * and is set to a nominal 10ms.
98     @@ -146,6 +150,9 @@ struct runqueue {
99    
100     unsigned long long timestamp_last_tick;
101     unsigned short cache_ticks, preempted;
102     + unsigned long iso_ticks;
103     + unsigned short iso_refractory;
104     +
105     task_t *curr, *idle;
106     struct mm_struct *prev_mm;
107     unsigned long bitmap[BITS_TO_LONGS(MAX_PRIO + 1)];
108     @@ -742,6 +749,17 @@ static int effective_prio(const task_t *
109     if (rt_task(p))
110     return p->prio;
111    
112     + if (iso_task(p)) {
113     + if (likely(!(p->flags & PF_ISOREF)))
114     + /*
115     + * If SCHED_ISO tasks have not used up their real time
116     + * quota they have run just better than highest
117     + * SCHED_NORMAL priority. Otherwise they run as
118     + * SCHED_NORMAL.
119     + */
120     + return ISO_PRIO;
121     + }
122     +
123     full_slice = slice(p);
124     if (full_slice > p->slice)
125     used_slice = full_slice - p->slice;
126     @@ -2632,6 +2650,22 @@ static void time_slice_expired(task_t *p
127     }
128    
129     /*
130     + * Test if SCHED_ISO tasks have run longer than their alloted period as RT
131     + * tasks and set the refractory flag if necessary. There is 10% hysteresis
132     + * for unsetting the flag.
133     + */
134     +static inline unsigned int test_ret_isorefractory(runqueue_t *rq)
135     +{
136     + if (likely(!rq->iso_refractory)) {
137     + if (rq->iso_ticks / ISO_PERIOD > sched_iso_cpu)
138     + rq->iso_refractory = 1;
139     + } else
140     + if (rq->iso_ticks / ISO_PERIOD < (sched_iso_cpu * 90 / 100))
141     + rq->iso_refractory = 0;
142     + return rq->iso_refractory;
143     +}
144     +
145     +/*
146     * This function gets called by the timer code, with HZ frequency.
147     * We call it with interrupts disabled.
148     */
149     @@ -2659,11 +2693,29 @@ void scheduler_tick(void)
150     set_tsk_need_resched(p);
151     goto out;
152     }
153     - /* SCHED_FIFO tasks never run out of timeslice. */
154     - if (unlikely(p->policy == SCHED_FIFO))
155     - goto out;
156    
157     spin_lock(&rq->lock);
158     + if (unlikely((rt_task(p) || (iso_task(p) && !rq->iso_refractory)) &&
159     + p->mm)) {
160     + if (rq->iso_ticks <= (ISO_PERIOD * 100) - 100)
161     + rq->iso_ticks += 100;
162     + } else
163     + rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - 1) / ISO_PERIOD;
164     +
165     + if (iso_task(p)) {
166     + if (unlikely(test_ret_isorefractory(rq))) {
167     + if (!(p->flags & PF_ISOREF)) {
168     + set_tsk_need_resched(p);
169     + p->flags |= PF_ISOREF;
170     + }
171     + } else
172     + p->flags &= ~PF_ISOREF;
173     + } else
174     + /* SCHED_FIFO tasks never run out of timeslice. */
175     + if (unlikely(p->policy == SCHED_FIFO))
176     + goto out_unlock;
177     +
178     +
179     debit = ns_diff(rq->timestamp_last_tick, p->timestamp);
180     p->ns_debit += debit;
181     if (p->ns_debit < NSJIFFY)
182     @@ -2758,7 +2810,7 @@ static int dependent_sleeper(int this_cp
183     int ret = 0, i;
184    
185     /* kernel/rt threads do not participate in dependent sleeping */
186     - if (!p->mm || rt_task(p))
187     + if (!p->mm || rt_task(p) || iso_task(p))
188     return 0;
189    
190     for_each_domain(this_cpu, tmp) {
191     @@ -2795,7 +2847,7 @@ static int dependent_sleeper(int this_cp
192     * task from using an unfair proportion of the
193     * physical cpu's resources. -ck
194     */
195     - if (rt_task(smt_curr)) {
196     + if (rt_task(smt_curr) || iso_task(smt_curr)) {
197     /*
198     * With real time tasks we run non-rt tasks only
199     * per_cpu_gain% of the time.
200     @@ -3567,9 +3619,19 @@ int sched_setscheduler(struct task_struc
201     {
202     int retval;
203     int queued, oldprio, oldpolicy = -1;
204     + struct sched_param zero_param = { .sched_priority = 0 };
205     unsigned long flags;
206     runqueue_t *rq;
207    
208     + if (SCHED_RT(policy) && !capable(CAP_SYS_NICE)) {
209     + /*
210     + * If the caller requested an RT policy without having the
211     + * necessary rights, we downgrade the policy to SCHED_ISO.
212     + * We also set the parameter to zero to pass the checks.
213     + */
214     + policy = SCHED_ISO;
215     + param = &zero_param;
216     + }
217     recheck:
218     /* double check policy once rq lock held */
219     if (policy < 0)
220     @@ -4063,6 +4125,7 @@ asmlinkage long sys_sched_get_priority_m
221     break;
222     case SCHED_NORMAL:
223     case SCHED_BATCH:
224     + case SCHED_ISO:
225     ret = 0;
226     break;
227     }
228     @@ -4087,6 +4150,7 @@ asmlinkage long sys_sched_get_priority_m
229     break;
230     case SCHED_NORMAL:
231     case SCHED_BATCH:
232     + case SCHED_ISO:
233     ret = 0;
234     }
235     return ret;
236     @@ -5992,7 +6056,8 @@ void __init sched_init(void)
237    
238     rq = cpu_rq(i);
239     spin_lock_init(&rq->lock);
240     - rq->nr_running = rq->cache_ticks = rq->preempted = 0;
241     + rq->nr_running = rq->cache_ticks = rq->preempted =
242     + rq->iso_ticks = 0;
243    
244     #ifdef CONFIG_SMP
245     rq->sd = NULL;
246     Index: linux-ck-dev/kernel/sysctl.c
247     ===================================================================
248     --- linux-ck-dev.orig/kernel/sysctl.c 2006-06-18 15:23:21.000000000 +1000
249     +++ linux-ck-dev/kernel/sysctl.c 2006-06-18 15:23:38.000000000 +1000
250     @@ -229,6 +229,11 @@ static ctl_table root_table[] = {
251     { .ctl_name = 0 }
252     };
253    
254     +/* Constants for minimum and maximum testing.
255     + We use these as one-element integer vectors. */
256     +static int zero;
257     +static int one_hundred = 100;
258     +
259     static ctl_table kern_table[] = {
260     {
261     .ctl_name = KERN_OSTYPE,
262     @@ -639,6 +644,17 @@ static ctl_table kern_table[] = {
263     .mode = 0644,
264     .proc_handler = &proc_dointvec,
265     },
266     + {
267     + .ctl_name = KERN_ISO_CPU,
268     + .procname = "iso_cpu",
269     + .data = &sched_iso_cpu,
270     + .maxlen = sizeof (int),
271     + .mode = 0644,
272     + .proc_handler = &proc_dointvec_minmax,
273     + .strategy = &sysctl_intvec,
274     + .extra1 = &zero,
275     + .extra2 = &one_hundred,
276     + },
277     #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
278     {
279     .ctl_name = KERN_UNKNOWN_NMI_PANIC,
280     @@ -702,12 +718,6 @@ static ctl_table kern_table[] = {
281     { .ctl_name = 0 }
282     };
283    
284     -/* Constants for minimum and maximum testing in vm_table.
285     - We use these as one-element integer vectors. */
286     -static int zero;
287     -static int one_hundred = 100;
288     -
289     -
290     static ctl_table vm_table[] = {
291     {
292     .ctl_name = VM_OVERCOMMIT_MEMORY,
293     Index: linux-ck-dev/Documentation/sysctl/kernel.txt
294     ===================================================================
295     --- linux-ck-dev.orig/Documentation/sysctl/kernel.txt 2006-06-18 15:23:21.000000000 +1000
296     +++ linux-ck-dev/Documentation/sysctl/kernel.txt 2006-06-18 15:23:38.000000000 +1000
297     @@ -27,6 +27,7 @@ show up in /proc/sys/kernel:
298     - hostname
299     - hotplug
300     - interactive
301     +- iso_cpu
302     - java-appletviewer [ binfmt_java, obsolete ]
303     - java-interpreter [ binfmt_java, obsolete ]
304     - l2cr [ PPC only ]
305     @@ -182,6 +183,14 @@ are obeyed if this tunable is disabled.
306    
307     ==============================================================
308    
309     +iso_cpu:
310     +
311     +This sets the percentage cpu that the unprivileged SCHED_ISO tasks can
312     +run effectively at realtime priority, averaged over a rolling 3 seconds.
313     +Set to 80% by default.
314     +
315     +==============================================================
316     +
317     l2cr: (PPC only)
318    
319     This flag controls the L2 cache of G3 processor boards. If