Magellan Linux

Annotation of /trunk/kernel26-alx/patches-2.6.20-r5/0005-2.6.20-sched-iso-4.7.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 199 - (hide annotations) (download)
Fri May 18 11:04:36 2007 UTC (17 years ago) by niro
File size: 10451 byte(s)
-import

1 niro 199 Add the SCHED_ISO policy (isochronous) which is a starvation free soft
2     realtime policy available to unprivileged users. The amount of cpu that
3     SCHED_ISO tasks will run as realtime is configurable by the tunable in
4    
5     /proc/sys/kernel/iso_cpu
6    
7     and is set to 80% (over 3 seconds) by default.
8    
9     Signed-off-by: Con Kolivas <kernel@kolivas.org>
10    
11     Documentation/sysctl/kernel.txt | 9 ++++
12     include/linux/sched.h | 10 +++--
13     kernel/sched.c | 77 ++++++++++++++++++++++++++++++++++++----
14     kernel/sysctl.c | 25 +++++++++---
15     4 files changed, 106 insertions(+), 15 deletions(-)
16    
17     Index: linux-2.6.20-ck1/include/linux/sched.h
18     ===================================================================
19     --- linux-2.6.20-ck1.orig/include/linux/sched.h 2007-02-16 19:01:30.000000000 +1100
20     +++ linux-2.6.20-ck1/include/linux/sched.h 2007-02-16 19:01:31.000000000 +1100
21     @@ -34,10 +34,11 @@
22     #define SCHED_FIFO 1
23     #define SCHED_RR 2
24     #define SCHED_BATCH 3
25     +#define SCHED_ISO 4
26    
27     #ifdef __KERNEL__
28    
29     -#define SCHED_MAX SCHED_BATCH
30     +#define SCHED_MAX SCHED_ISO
31     #define SCHED_RANGE(policy) ((policy) <= SCHED_MAX)
32    
33     struct sched_param {
34     @@ -219,7 +220,7 @@ extern void show_stack(struct task_struc
35    
36     void io_schedule(void);
37     long io_schedule_timeout(long timeout);
38     -extern int sched_interactive, sched_compute;
39     +extern int sched_interactive, sched_compute, sched_iso_cpu;
40    
41     extern void cpu_init (void);
42     extern void trap_init(void);
43     @@ -526,16 +527,18 @@ struct signal_struct {
44    
45     #define MAX_USER_RT_PRIO 100
46     #define MAX_RT_PRIO MAX_USER_RT_PRIO
47     +#define ISO_PRIO (MAX_RT_PRIO - 1)
48    
49     #define MAX_PRIO (MAX_RT_PRIO + 40)
50     #define MIN_USER_PRIO (MAX_PRIO - 1)
51    
52     -#define rt_prio(prio) unlikely((prio) < MAX_RT_PRIO)
53     +#define rt_prio(prio) unlikely((prio) < ISO_PRIO)
54     #define rt_task(p) rt_prio((p)->prio)
55     #define batch_task(p) (unlikely((p)->policy == SCHED_BATCH))
56     #define is_rt_policy(policy) ((policy) == SCHED_FIFO || \
57     (policy) == SCHED_RR)
58     #define has_rt_policy(p) unlikely(is_rt_policy((p)->policy))
59     +#define iso_task(p) (unlikely((p)->policy == SCHED_ISO))
60    
61     /*
62     * Some day this will be a full-fledged user tracking system..
63     @@ -1151,6 +1154,7 @@ static inline void put_task_struct(struc
64     #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
65     #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
66     #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
67     +#define PF_ISOREF 0x04000000 /* SCHED_ISO task has used up quota */
68     #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
69     #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
70     #define PF_NONSLEEP 0x40000000 /* Waiting on in kernel activity */
71     Index: linux-2.6.20-ck1/kernel/sched.c
72     ===================================================================
73     --- linux-2.6.20-ck1.orig/kernel/sched.c 2007-02-16 19:01:30.000000000 +1100
74     +++ linux-2.6.20-ck1/kernel/sched.c 2007-02-16 19:01:31.000000000 +1100
75     @@ -65,10 +65,14 @@
76     * raise its priority.
77     * sched_compute - sysctl which enables long timeslices and delayed preemption
78     * for compute server usage.
79     + * sched_iso_cpu - sysctl which determines the cpu percentage SCHED_ISO tasks
80     + * are allowed to run (over ISO_PERIOD seconds) as real time tasks.
81     */
82     int sched_interactive __read_mostly = 1;
83     int sched_compute __read_mostly;
84     +int sched_iso_cpu __read_mostly = 80;
85    
86     +#define ISO_PERIOD (5 * HZ)
87     /*
88     * CACHE_DELAY is the time preemption is delayed in sched_compute mode
89     * and is set to a nominal 10ms.
90     @@ -143,6 +147,8 @@ struct rq {
91     /* Cached timestamp set by update_cpu_clock() */
92     unsigned long long most_recent_timestamp;
93     unsigned short cache_ticks, preempted;
94     + unsigned long iso_ticks;
95     + unsigned short iso_refractory;
96     struct task_struct *curr, *idle;
97     unsigned long next_balance;
98     struct mm_struct *prev_mm;
99     @@ -878,6 +884,17 @@ static inline int __normal_prio(struct t
100     unsigned int full_slice, used_slice = 0;
101     unsigned int best_bonus, rr;
102    
103     + if (iso_task(p)) {
104     + if (likely(!(p->flags & PF_ISOREF)))
105     + /*
106     + * If SCHED_ISO tasks have not used up their real time
107     + * quota they have run just better than highest
108     + * SCHED_NORMAL priority. Otherwise they run as
109     + * SCHED_NORMAL.
110     + */
111     + return ISO_PRIO;
112     + }
113     +
114     full_slice = slice(p);
115     if (full_slice > p->slice)
116     used_slice = full_slice - p->slice;
117     @@ -2990,6 +3007,23 @@ static void time_slice_expired(struct ta
118     requeue_task(p, rq, effective_prio(p));
119     }
120    
121     +/*
122     + * Test if SCHED_ISO tasks have run longer than their alloted period as RT
123     + * tasks and set the refractory flag if necessary. There is 10% hysteresis
124     + * for unsetting the flag.
125     + */
126     +static inline unsigned int test_ret_isorefractory(struct rq *rq)
127     +{
128     + if (likely(!rq->iso_refractory)) {
129     + if (rq->iso_ticks / ISO_PERIOD > sched_iso_cpu)
130     + rq->iso_refractory = 1;
131     + } else {
132     + if (rq->iso_ticks / ISO_PERIOD < (sched_iso_cpu * 90 / 100))
133     + rq->iso_refractory = 0;
134     + }
135     + return rq->iso_refractory;
136     +}
137     +
138     static void task_running_tick(struct rq *rq, struct task_struct *p)
139     {
140     unsigned long debit;
141     @@ -2999,11 +3033,29 @@ static void task_running_tick(struct rq
142     set_tsk_need_resched(p);
143     return;
144     }
145     - /* SCHED_FIFO tasks never run out of timeslice. */
146     - if (unlikely(p->policy == SCHED_FIFO))
147     - return;
148    
149     spin_lock(&rq->lock);
150     + if (unlikely((rt_task(p) || (iso_task(p) && !rq->iso_refractory)) &&
151     + p->mm)) {
152     + if (rq->iso_ticks <= (ISO_PERIOD * 100) - 100)
153     + rq->iso_ticks += 100;
154     + } else
155     + rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - 1) / ISO_PERIOD;
156     +
157     + if (iso_task(p)) {
158     + if (unlikely(test_ret_isorefractory(rq))) {
159     + if (!(p->flags & PF_ISOREF)) {
160     + set_tsk_need_resched(p);
161     + p->flags |= PF_ISOREF;
162     + }
163     + } else
164     + p->flags &= ~PF_ISOREF;
165     + } else {
166     + /* SCHED_FIFO tasks never run out of timeslice. */
167     + if (unlikely(p->policy == SCHED_FIFO))
168     + goto out_unlock;
169     + }
170     +
171     debit = ns_diff(rq->most_recent_timestamp, p->timestamp);
172     p->ns_debit += debit;
173     if (p->ns_debit < NSJIFFY)
174     @@ -3122,7 +3174,7 @@ dependent_sleeper(int this_cpu, struct r
175     int ret = 0, i;
176    
177     /* kernel/rt threads do not participate in dependent sleeping */
178     - if (!p->mm || rt_task(p))
179     + if (!p->mm || rt_task(p) || iso_task(p))
180     return 0;
181    
182     for_each_domain(this_cpu, tmp) {
183     @@ -3159,7 +3211,7 @@ dependent_sleeper(int this_cpu, struct r
184     * task from using an unfair proportion of the
185     * physical cpu's resources. -ck
186     */
187     - if (rt_task(smt_curr)) {
188     + if (rt_task(smt_curr) || iso_task(smt_curr)) {
189     /*
190     * With real time tasks we run non-rt tasks only
191     * per_cpu_gain% of the time.
192     @@ -3971,12 +4023,22 @@ static void __setscheduler(struct task_s
193     int sched_setscheduler(struct task_struct *p, int policy,
194     struct sched_param *param)
195     {
196     + struct sched_param zero_param = { .sched_priority = 0 };
197     int queued, retval, oldprio, oldpolicy = -1;
198     unsigned long flags;
199     struct rq *rq;
200    
201     /* may grab non-irq protected spin_locks */
202     BUG_ON(in_interrupt());
203     + if (is_rt_policy(policy) && !capable(CAP_SYS_NICE)) {
204     + /*
205     + * If the caller requested an RT policy without having the
206     + * necessary rights, we downgrade the policy to SCHED_ISO.
207     + * We also set the parameter to zero to pass the checks.
208     + */
209     + policy = SCHED_ISO;
210     + param = &zero_param;
211     + }
212     recheck:
213     /* double check policy once rq lock held */
214     if (policy < 0)
215     @@ -4501,6 +4563,7 @@ asmlinkage long sys_sched_get_priority_m
216     break;
217     case SCHED_NORMAL:
218     case SCHED_BATCH:
219     + case SCHED_ISO:
220     ret = 0;
221     break;
222     }
223     @@ -4525,6 +4588,7 @@ asmlinkage long sys_sched_get_priority_m
224     break;
225     case SCHED_NORMAL:
226     case SCHED_BATCH:
227     + case SCHED_ISO:
228     ret = 0;
229     }
230     return ret;
231     @@ -6647,7 +6711,8 @@ void __init sched_init(void)
232     rq = cpu_rq(i);
233     spin_lock_init(&rq->lock);
234     lockdep_set_class(&rq->lock, &rq->rq_lock_key);
235     - rq->nr_running = rq->cache_ticks = rq->preempted = 0;
236     + rq->nr_running = rq->cache_ticks = rq->preempted =
237     + rq->iso_ticks = 0;
238    
239     #ifdef CONFIG_SMP
240     rq->sd = NULL;
241     Index: linux-2.6.20-ck1/kernel/sysctl.c
242     ===================================================================
243     --- linux-2.6.20-ck1.orig/kernel/sysctl.c 2007-02-16 19:01:30.000000000 +1100
244     +++ linux-2.6.20-ck1/kernel/sysctl.c 2007-02-16 19:01:31.000000000 +1100
245     @@ -273,6 +273,14 @@ static ctl_table root_table[] = {
246     { .ctl_name = 0 }
247     };
248    
249     +
250     +/*
251     + * Constants for minimum and maximum testing.
252     + * We use these as one-element integer vectors.
253     + */
254     +static int zero;
255     +static int one_hundred = 100;
256     +
257     static ctl_table kern_table[] = {
258     {
259     .ctl_name = KERN_OSTYPE,
260     @@ -692,6 +700,17 @@ static ctl_table kern_table[] = {
261     .mode = 0644,
262     .proc_handler = &proc_dointvec,
263     },
264     + {
265     + .ctl_name = CTL_UNNUMBERED,
266     + .procname = "iso_cpu",
267     + .data = &sched_iso_cpu,
268     + .maxlen = sizeof (int),
269     + .mode = 0644,
270     + .proc_handler = &proc_dointvec_minmax,
271     + .strategy = &sysctl_intvec,
272     + .extra1 = &zero,
273     + .extra2 = &one_hundred,
274     + },
275     #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
276     {
277     .ctl_name = KERN_UNKNOWN_NMI_PANIC,
278     @@ -800,12 +819,6 @@ static ctl_table kern_table[] = {
279     { .ctl_name = 0 }
280     };
281    
282     -/* Constants for minimum and maximum testing in vm_table.
283     - We use these as one-element integer vectors. */
284     -static int zero;
285     -static int one_hundred = 100;
286     -
287     -
288     static ctl_table vm_table[] = {
289     {
290     .ctl_name = VM_OVERCOMMIT_MEMORY,
291     Index: linux-2.6.20-ck1/Documentation/sysctl/kernel.txt
292     ===================================================================
293     --- linux-2.6.20-ck1.orig/Documentation/sysctl/kernel.txt 2007-02-16 19:01:30.000000000 +1100
294     +++ linux-2.6.20-ck1/Documentation/sysctl/kernel.txt 2007-02-16 19:01:31.000000000 +1100
295     @@ -27,6 +27,7 @@ show up in /proc/sys/kernel:
296     - hostname
297     - hotplug
298     - interactive
299     +- iso_cpu
300     - java-appletviewer [ binfmt_java, obsolete ]
301     - java-interpreter [ binfmt_java, obsolete ]
302     - kstack_depth_to_print [ X86 only ]
303     @@ -185,6 +186,14 @@ are obeyed if this tunable is disabled.
304    
305     ==============================================================
306    
307     +iso_cpu:
308     +
309     +This sets the percentage cpu that the unprivileged SCHED_ISO tasks can
310     +run effectively at realtime priority, averaged over a rolling 3 seconds.
311     +Set to 80% by default.
312     +
313     +==============================================================
314     +
315     l2cr: (PPC only)
316    
317     This flag controls the L2 cache of G3 processor boards. If