Magellan Linux

Contents of /trunk/kernel26-alx/patches-2.6.20-r6/0005-2.6.20-sched-iso-4.7.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1175 - (show annotations) (download)
Thu Oct 14 12:15:46 2010 UTC (13 years, 6 months ago) by niro
File size: 10451 byte(s)
-2.6.20-alx-r6 new magellan 0.5.2 kernel
1 Add the SCHED_ISO policy (isochronous) which is a starvation free soft
2 realtime policy available to unprivileged users. The amount of cpu that
3 SCHED_ISO tasks will run as realtime is configurable by the tunable in
4
5 /proc/sys/kernel/iso_cpu
6
7 and is set to 80% (over 3 seconds) by default.
8
9 Signed-off-by: Con Kolivas <kernel@kolivas.org>
10
11 Documentation/sysctl/kernel.txt | 9 ++++
12 include/linux/sched.h | 10 +++--
13 kernel/sched.c | 77 ++++++++++++++++++++++++++++++++++++----
14 kernel/sysctl.c | 25 +++++++++---
15 4 files changed, 106 insertions(+), 15 deletions(-)
16
17 Index: linux-2.6.20-ck1/include/linux/sched.h
18 ===================================================================
19 --- linux-2.6.20-ck1.orig/include/linux/sched.h 2007-02-16 19:01:30.000000000 +1100
20 +++ linux-2.6.20-ck1/include/linux/sched.h 2007-02-16 19:01:31.000000000 +1100
21 @@ -34,10 +34,11 @@
22 #define SCHED_FIFO 1
23 #define SCHED_RR 2
24 #define SCHED_BATCH 3
25 +#define SCHED_ISO 4
26
27 #ifdef __KERNEL__
28
29 -#define SCHED_MAX SCHED_BATCH
30 +#define SCHED_MAX SCHED_ISO
31 #define SCHED_RANGE(policy) ((policy) <= SCHED_MAX)
32
33 struct sched_param {
34 @@ -219,7 +220,7 @@ extern void show_stack(struct task_struc
35
36 void io_schedule(void);
37 long io_schedule_timeout(long timeout);
38 -extern int sched_interactive, sched_compute;
39 +extern int sched_interactive, sched_compute, sched_iso_cpu;
40
41 extern void cpu_init (void);
42 extern void trap_init(void);
43 @@ -526,16 +527,18 @@ struct signal_struct {
44
45 #define MAX_USER_RT_PRIO 100
46 #define MAX_RT_PRIO MAX_USER_RT_PRIO
47 +#define ISO_PRIO (MAX_RT_PRIO - 1)
48
49 #define MAX_PRIO (MAX_RT_PRIO + 40)
50 #define MIN_USER_PRIO (MAX_PRIO - 1)
51
52 -#define rt_prio(prio) unlikely((prio) < MAX_RT_PRIO)
53 +#define rt_prio(prio) unlikely((prio) < ISO_PRIO)
54 #define rt_task(p) rt_prio((p)->prio)
55 #define batch_task(p) (unlikely((p)->policy == SCHED_BATCH))
56 #define is_rt_policy(policy) ((policy) == SCHED_FIFO || \
57 (policy) == SCHED_RR)
58 #define has_rt_policy(p) unlikely(is_rt_policy((p)->policy))
59 +#define iso_task(p) (unlikely((p)->policy == SCHED_ISO))
60
61 /*
62 * Some day this will be a full-fledged user tracking system..
63 @@ -1151,6 +1154,7 @@ static inline void put_task_struct(struc
64 #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
65 #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
66 #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
67 +#define PF_ISOREF 0x04000000 /* SCHED_ISO task has used up quota */
68 #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
69 #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
70 #define PF_NONSLEEP 0x40000000 /* Waiting on in kernel activity */
71 Index: linux-2.6.20-ck1/kernel/sched.c
72 ===================================================================
73 --- linux-2.6.20-ck1.orig/kernel/sched.c 2007-02-16 19:01:30.000000000 +1100
74 +++ linux-2.6.20-ck1/kernel/sched.c 2007-02-16 19:01:31.000000000 +1100
75 @@ -65,10 +65,14 @@
76 * raise its priority.
77 * sched_compute - sysctl which enables long timeslices and delayed preemption
78 * for compute server usage.
79 + * sched_iso_cpu - sysctl which determines the cpu percentage SCHED_ISO tasks
80 + * are allowed to run (over ISO_PERIOD seconds) as real time tasks.
81 */
82 int sched_interactive __read_mostly = 1;
83 int sched_compute __read_mostly;
84 +int sched_iso_cpu __read_mostly = 80;
85
86 +#define ISO_PERIOD (5 * HZ)
87 /*
88 * CACHE_DELAY is the time preemption is delayed in sched_compute mode
89 * and is set to a nominal 10ms.
90 @@ -143,6 +147,8 @@ struct rq {
91 /* Cached timestamp set by update_cpu_clock() */
92 unsigned long long most_recent_timestamp;
93 unsigned short cache_ticks, preempted;
94 + unsigned long iso_ticks;
95 + unsigned short iso_refractory;
96 struct task_struct *curr, *idle;
97 unsigned long next_balance;
98 struct mm_struct *prev_mm;
99 @@ -878,6 +884,17 @@ static inline int __normal_prio(struct t
100 unsigned int full_slice, used_slice = 0;
101 unsigned int best_bonus, rr;
102
103 + if (iso_task(p)) {
104 + if (likely(!(p->flags & PF_ISOREF)))
105 + /*
106 + * If SCHED_ISO tasks have not used up their real time
107 + * quota they have run just better than highest
108 + * SCHED_NORMAL priority. Otherwise they run as
109 + * SCHED_NORMAL.
110 + */
111 + return ISO_PRIO;
112 + }
113 +
114 full_slice = slice(p);
115 if (full_slice > p->slice)
116 used_slice = full_slice - p->slice;
117 @@ -2990,6 +3007,23 @@ static void time_slice_expired(struct ta
118 requeue_task(p, rq, effective_prio(p));
119 }
120
121 +/*
122 + * Test if SCHED_ISO tasks have run longer than their alloted period as RT
123 + * tasks and set the refractory flag if necessary. There is 10% hysteresis
124 + * for unsetting the flag.
125 + */
126 +static inline unsigned int test_ret_isorefractory(struct rq *rq)
127 +{
128 + if (likely(!rq->iso_refractory)) {
129 + if (rq->iso_ticks / ISO_PERIOD > sched_iso_cpu)
130 + rq->iso_refractory = 1;
131 + } else {
132 + if (rq->iso_ticks / ISO_PERIOD < (sched_iso_cpu * 90 / 100))
133 + rq->iso_refractory = 0;
134 + }
135 + return rq->iso_refractory;
136 +}
137 +
138 static void task_running_tick(struct rq *rq, struct task_struct *p)
139 {
140 unsigned long debit;
141 @@ -2999,11 +3033,29 @@ static void task_running_tick(struct rq
142 set_tsk_need_resched(p);
143 return;
144 }
145 - /* SCHED_FIFO tasks never run out of timeslice. */
146 - if (unlikely(p->policy == SCHED_FIFO))
147 - return;
148
149 spin_lock(&rq->lock);
150 + if (unlikely((rt_task(p) || (iso_task(p) && !rq->iso_refractory)) &&
151 + p->mm)) {
152 + if (rq->iso_ticks <= (ISO_PERIOD * 100) - 100)
153 + rq->iso_ticks += 100;
154 + } else
155 + rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - 1) / ISO_PERIOD;
156 +
157 + if (iso_task(p)) {
158 + if (unlikely(test_ret_isorefractory(rq))) {
159 + if (!(p->flags & PF_ISOREF)) {
160 + set_tsk_need_resched(p);
161 + p->flags |= PF_ISOREF;
162 + }
163 + } else
164 + p->flags &= ~PF_ISOREF;
165 + } else {
166 + /* SCHED_FIFO tasks never run out of timeslice. */
167 + if (unlikely(p->policy == SCHED_FIFO))
168 + goto out_unlock;
169 + }
170 +
171 debit = ns_diff(rq->most_recent_timestamp, p->timestamp);
172 p->ns_debit += debit;
173 if (p->ns_debit < NSJIFFY)
174 @@ -3122,7 +3174,7 @@ dependent_sleeper(int this_cpu, struct r
175 int ret = 0, i;
176
177 /* kernel/rt threads do not participate in dependent sleeping */
178 - if (!p->mm || rt_task(p))
179 + if (!p->mm || rt_task(p) || iso_task(p))
180 return 0;
181
182 for_each_domain(this_cpu, tmp) {
183 @@ -3159,7 +3211,7 @@ dependent_sleeper(int this_cpu, struct r
184 * task from using an unfair proportion of the
185 * physical cpu's resources. -ck
186 */
187 - if (rt_task(smt_curr)) {
188 + if (rt_task(smt_curr) || iso_task(smt_curr)) {
189 /*
190 * With real time tasks we run non-rt tasks only
191 * per_cpu_gain% of the time.
192 @@ -3971,12 +4023,22 @@ static void __setscheduler(struct task_s
193 int sched_setscheduler(struct task_struct *p, int policy,
194 struct sched_param *param)
195 {
196 + struct sched_param zero_param = { .sched_priority = 0 };
197 int queued, retval, oldprio, oldpolicy = -1;
198 unsigned long flags;
199 struct rq *rq;
200
201 /* may grab non-irq protected spin_locks */
202 BUG_ON(in_interrupt());
203 + if (is_rt_policy(policy) && !capable(CAP_SYS_NICE)) {
204 + /*
205 + * If the caller requested an RT policy without having the
206 + * necessary rights, we downgrade the policy to SCHED_ISO.
207 + * We also set the parameter to zero to pass the checks.
208 + */
209 + policy = SCHED_ISO;
210 + param = &zero_param;
211 + }
212 recheck:
213 /* double check policy once rq lock held */
214 if (policy < 0)
215 @@ -4501,6 +4563,7 @@ asmlinkage long sys_sched_get_priority_m
216 break;
217 case SCHED_NORMAL:
218 case SCHED_BATCH:
219 + case SCHED_ISO:
220 ret = 0;
221 break;
222 }
223 @@ -4525,6 +4588,7 @@ asmlinkage long sys_sched_get_priority_m
224 break;
225 case SCHED_NORMAL:
226 case SCHED_BATCH:
227 + case SCHED_ISO:
228 ret = 0;
229 }
230 return ret;
231 @@ -6647,7 +6711,8 @@ void __init sched_init(void)
232 rq = cpu_rq(i);
233 spin_lock_init(&rq->lock);
234 lockdep_set_class(&rq->lock, &rq->rq_lock_key);
235 - rq->nr_running = rq->cache_ticks = rq->preempted = 0;
236 + rq->nr_running = rq->cache_ticks = rq->preempted =
237 + rq->iso_ticks = 0;
238
239 #ifdef CONFIG_SMP
240 rq->sd = NULL;
241 Index: linux-2.6.20-ck1/kernel/sysctl.c
242 ===================================================================
243 --- linux-2.6.20-ck1.orig/kernel/sysctl.c 2007-02-16 19:01:30.000000000 +1100
244 +++ linux-2.6.20-ck1/kernel/sysctl.c 2007-02-16 19:01:31.000000000 +1100
245 @@ -273,6 +273,14 @@ static ctl_table root_table[] = {
246 { .ctl_name = 0 }
247 };
248
249 +
250 +/*
251 + * Constants for minimum and maximum testing.
252 + * We use these as one-element integer vectors.
253 + */
254 +static int zero;
255 +static int one_hundred = 100;
256 +
257 static ctl_table kern_table[] = {
258 {
259 .ctl_name = KERN_OSTYPE,
260 @@ -692,6 +700,17 @@ static ctl_table kern_table[] = {
261 .mode = 0644,
262 .proc_handler = &proc_dointvec,
263 },
264 + {
265 + .ctl_name = CTL_UNNUMBERED,
266 + .procname = "iso_cpu",
267 + .data = &sched_iso_cpu,
268 + .maxlen = sizeof (int),
269 + .mode = 0644,
270 + .proc_handler = &proc_dointvec_minmax,
271 + .strategy = &sysctl_intvec,
272 + .extra1 = &zero,
273 + .extra2 = &one_hundred,
274 + },
275 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
276 {
277 .ctl_name = KERN_UNKNOWN_NMI_PANIC,
278 @@ -800,12 +819,6 @@ static ctl_table kern_table[] = {
279 { .ctl_name = 0 }
280 };
281
282 -/* Constants for minimum and maximum testing in vm_table.
283 - We use these as one-element integer vectors. */
284 -static int zero;
285 -static int one_hundred = 100;
286 -
287 -
288 static ctl_table vm_table[] = {
289 {
290 .ctl_name = VM_OVERCOMMIT_MEMORY,
291 Index: linux-2.6.20-ck1/Documentation/sysctl/kernel.txt
292 ===================================================================
293 --- linux-2.6.20-ck1.orig/Documentation/sysctl/kernel.txt 2007-02-16 19:01:30.000000000 +1100
294 +++ linux-2.6.20-ck1/Documentation/sysctl/kernel.txt 2007-02-16 19:01:31.000000000 +1100
295 @@ -27,6 +27,7 @@ show up in /proc/sys/kernel:
296 - hostname
297 - hotplug
298 - interactive
299 +- iso_cpu
300 - java-appletviewer [ binfmt_java, obsolete ]
301 - java-interpreter [ binfmt_java, obsolete ]
302 - kstack_depth_to_print [ X86 only ]
303 @@ -185,6 +186,14 @@ are obeyed if this tunable is disabled.
304
305 ==============================================================
306
307 +iso_cpu:
308 +
309 +This sets the percentage cpu that the unprivileged SCHED_ISO tasks can
310 +run effectively at realtime priority, averaged over a rolling 3 seconds.
311 +Set to 80% by default.
312 +
313 +==============================================================
314 +
315 l2cr: (PPC only)
316
317 This flag controls the L2 cache of G3 processor boards. If