Contents of /trunk/kernel26-magellan/patches-2.6.16-r10/0010-2.6.16-sched-iso-4.1.patch
Parent Directory | Revision Log
Revision 70 -
(show annotations)
(download)
Thu May 11 19:09:22 2006 UTC (18 years, 4 months ago) by niro
File size: 8380 byte(s)
Thu May 11 19:09:22 2006 UTC (18 years, 4 months ago) by niro
File size: 8380 byte(s)
import
1 | include/linux/sched.h | 9 ++++-- |
2 | include/linux/sysctl.h | 1 |
3 | kernel/sched.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++--- |
4 | kernel/sysctl.c | 22 ++++++++++---- |
5 | 4 files changed, 91 insertions(+), 13 deletions(-) |
6 | |
7 | Index: linux-2.6.16-ck1/include/linux/sched.h |
8 | =================================================================== |
9 | --- linux-2.6.16-ck1.orig/include/linux/sched.h 2006-03-20 20:46:49.000000000 +1100 |
10 | +++ linux-2.6.16-ck1/include/linux/sched.h 2006-03-20 20:46:50.000000000 +1100 |
11 | @@ -162,9 +162,10 @@ extern unsigned long weighted_cpuload(co |
12 | #define SCHED_FIFO 1 |
13 | #define SCHED_RR 2 |
14 | #define SCHED_BATCH 3 |
15 | +#define SCHED_ISO 4 |
16 | |
17 | #define SCHED_MIN 0 |
18 | -#define SCHED_MAX 3 |
19 | +#define SCHED_MAX 4 |
20 | |
21 | #define SCHED_RANGE(policy) ((policy) >= SCHED_MIN && \ |
22 | (policy) <= SCHED_MAX) |
23 | @@ -208,7 +209,7 @@ extern void show_stack(struct task_struc |
24 | |
25 | void io_schedule(void); |
26 | long io_schedule_timeout(long timeout); |
27 | -extern int sched_interactive, sched_compute; |
28 | +extern int sched_interactive, sched_compute, sched_iso_cpu; |
29 | |
30 | extern void cpu_init (void); |
31 | extern void trap_init(void); |
32 | @@ -498,7 +499,8 @@ struct signal_struct { |
33 | |
34 | #define MAX_PRIO (MAX_RT_PRIO + 40) |
35 | |
36 | -#define rt_task(p) (unlikely((p)->prio < MAX_RT_PRIO)) |
37 | +#define rt_task(p) (unlikely(SCHED_RT((p)->policy))) |
38 | +#define iso_task(p) ((p)->policy == SCHED_ISO) |
39 | |
40 | /* |
41 | * Some day this will be a full-fledged user tracking system.. |
42 | @@ -956,6 +958,7 @@ static inline void put_task_struct(struc |
43 | #define PF_RANDOMIZE 0x00800000 /* randomize virtual address space */ |
44 | #define PF_SWAPWRITE 0x01000000 /* Allowed to write to swap */ |
45 | #define PF_NONSLEEP 0x02000000 /* Waiting on in kernel activity */ |
46 | +#define PF_ISOREF 0x04000000 /* SCHED_ISO task has used up quota */ |
47 | |
48 | /* |
49 | * Only the _current_ task can read/write to tsk->flags, but other |
50 | Index: linux-2.6.16-ck1/include/linux/sysctl.h |
51 | =================================================================== |
52 | --- linux-2.6.16-ck1.orig/include/linux/sysctl.h 2006-03-20 20:46:48.000000000 +1100 |
53 | +++ linux-2.6.16-ck1/include/linux/sysctl.h 2006-03-20 20:46:50.000000000 +1100 |
54 | @@ -150,6 +150,7 @@ enum |
55 | KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ |
56 | KERN_INTERACTIVE=73, /* interactive tasks can have cpu bursts */ |
57 | KERN_COMPUTE=74, /* adjust timeslices for a compute server */ |
58 | + KERN_ISO_CPU=75, /* percent cpu SCHED_ISO tasks run SCHED_RR */ |
59 | }; |
60 | |
61 | |
62 | Index: linux-2.6.16-ck1/kernel/sched.c |
63 | =================================================================== |
64 | --- linux-2.6.16-ck1.orig/kernel/sched.c 2006-03-20 20:46:49.000000000 +1100 |
65 | +++ linux-2.6.16-ck1/kernel/sched.c 2006-03-20 20:46:50.000000000 +1100 |
66 | @@ -92,6 +92,9 @@ int sched_compute __read_mostly = 0; |
67 | #define RR_INTERVAL() (_RR_INTERVAL * (1 + 16 * sched_compute)) |
68 | #define DEF_TIMESLICE (RR_INTERVAL() * 19) |
69 | |
70 | +int sched_iso_cpu __read_mostly = 80; |
71 | +#define ISO_PERIOD (5 * HZ) |
72 | + |
73 | #define task_hot(p, now, sd) ((long long) ((now) - (p)->timestamp) \ |
74 | < (long long) (sd)->cache_hot_time) |
75 | |
76 | @@ -129,6 +132,9 @@ struct runqueue { |
77 | */ |
78 | unsigned long nr_uninterruptible; |
79 | |
80 | + unsigned long iso_ticks; |
81 | + unsigned int iso_refractory; |
82 | + |
83 | unsigned long long timestamp_last_tick; |
84 | unsigned int cache_ticks, preempted; |
85 | task_t *curr, *idle; |
86 | @@ -713,6 +719,17 @@ static int effective_prio(const task_t * |
87 | if (rt_task(p)) |
88 | return p->prio; |
89 | |
90 | + if (iso_task(p)) { |
91 | + if (likely(!(p->flags & PF_ISOREF))) |
92 | + /* |
93 | + * If SCHED_ISO tasks have not used up their real time |
94 | + * quota they have run just better than highest |
95 | + * SCHED_NORMAL priority. Otherwise they run as |
96 | + * SCHED_NORMAL. |
97 | + */ |
98 | + return MAX_RT_PRIO - 1; |
99 | + } |
100 | + |
101 | full_slice = slice(p); |
102 | if (full_slice > p->slice) |
103 | used_slice = full_slice - p->slice; |
104 | @@ -2522,6 +2539,22 @@ static void time_slice_expired(task_t *p |
105 | } |
106 | |
107 | /* |
108 | + * Test if SCHED_ISO tasks have run longer than their alloted period as RT |
109 | + * tasks and set the refractory flag if necessary. There is 10% hysteresis |
110 | + * for unsetting the flag. |
111 | + */ |
112 | +static inline unsigned int test_ret_isorefractory(runqueue_t *rq) |
113 | +{ |
114 | + if (likely(!rq->iso_refractory)) { |
115 | + if (rq->iso_ticks / ISO_PERIOD > sched_iso_cpu) |
116 | + rq->iso_refractory = 1; |
117 | + } else |
118 | + if (rq->iso_ticks / ISO_PERIOD < (sched_iso_cpu * 90 / 100)) |
119 | + rq->iso_refractory = 0; |
120 | + return rq->iso_refractory; |
121 | +} |
122 | + |
123 | +/* |
124 | * This function gets called by the timer code, with HZ frequency. |
125 | * We call it with interrupts disabled. |
126 | */ |
127 | @@ -2549,15 +2582,33 @@ void scheduler_tick(void) |
128 | set_tsk_need_resched(p); |
129 | goto out; |
130 | } |
131 | + |
132 | + spin_lock(&rq->lock); |
133 | + if (unlikely((rt_task(p) || (iso_task(p) && !rq->iso_refractory)) && |
134 | + p->mm)) { |
135 | + if (rq->iso_ticks <= (ISO_PERIOD * 100) - 100) |
136 | + rq->iso_ticks += 100; |
137 | + } else |
138 | + rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - 1) / ISO_PERIOD; |
139 | + |
140 | + if (iso_task(p)) { |
141 | + if (unlikely(test_ret_isorefractory(rq))) { |
142 | + if (!(p->flags & PF_ISOREF)) { |
143 | + set_tsk_need_resched(p); |
144 | + p->flags |= PF_ISOREF; |
145 | + } |
146 | + } else |
147 | + p->flags &= ~PF_ISOREF; |
148 | + } |
149 | + |
150 | /* |
151 | * SCHED_FIFO tasks never run out of timeslice. |
152 | */ |
153 | if (unlikely(p->policy == SCHED_FIFO)) { |
154 | expired_balance = 0; |
155 | - goto out; |
156 | + goto out_unlock; |
157 | } |
158 | |
159 | - spin_lock(&rq->lock); |
160 | debit = ns_diff(rq->timestamp_last_tick, p->timestamp); |
161 | p->ns_debit += debit; |
162 | if (p->ns_debit < NSJIFFY) |
163 | @@ -2696,7 +2747,7 @@ static int dependent_sleeper(const int t |
164 | task_t *smt_curr = smt_rq->curr; |
165 | |
166 | /* Kernel threads do not participate in dependent sleeping */ |
167 | - if (!p->mm || !smt_curr->mm || rt_task(p)) |
168 | + if (!p->mm || !smt_curr->mm || rt_task(p) || iso_task(p)) |
169 | goto check_smt_task; |
170 | |
171 | /* |
172 | @@ -2723,7 +2774,7 @@ static int dependent_sleeper(const int t |
173 | |
174 | check_smt_task: |
175 | if ((!smt_curr->mm && smt_curr != smt_rq->idle) || |
176 | - rt_task(smt_curr)) |
177 | + rt_task(smt_curr) || iso_task(smt_curr)) |
178 | continue; |
179 | if (!p->mm) { |
180 | wakeup_busy_runqueue(smt_rq); |
181 | @@ -3525,9 +3576,19 @@ int sched_setscheduler(struct task_struc |
182 | { |
183 | int retval; |
184 | int queued, oldprio, oldpolicy = -1; |
185 | + struct sched_param zero_param = { .sched_priority = 0 }; |
186 | unsigned long flags; |
187 | runqueue_t *rq; |
188 | |
189 | + if (SCHED_RT(policy) && !capable(CAP_SYS_NICE)) { |
190 | + /* |
191 | + * If the caller requested an RT policy without having the |
192 | + * necessary rights, we downgrade the policy to SCHED_ISO. |
193 | + * We also set the parameter to zero to pass the checks. |
194 | + */ |
195 | + policy = SCHED_ISO; |
196 | + param = &zero_param; |
197 | + } |
198 | recheck: |
199 | /* double check policy once rq lock held */ |
200 | if (policy < 0) |
201 | @@ -4029,6 +4090,7 @@ asmlinkage long sys_sched_get_priority_m |
202 | break; |
203 | case SCHED_NORMAL: |
204 | case SCHED_BATCH: |
205 | + case SCHED_ISO: |
206 | ret = 0; |
207 | break; |
208 | } |
209 | @@ -4053,6 +4115,7 @@ asmlinkage long sys_sched_get_priority_m |
210 | break; |
211 | case SCHED_NORMAL: |
212 | case SCHED_BATCH: |
213 | + case SCHED_ISO: |
214 | ret = 0; |
215 | } |
216 | return ret; |
217 | @@ -5903,6 +5966,7 @@ void __init sched_init(void) |
218 | rq->nr_running = 0; |
219 | rq->cache_ticks = 0; |
220 | rq->preempted = 0; |
221 | + rq->iso_ticks = 0; |
222 | |
223 | #ifdef CONFIG_SMP |
224 | rq->sd = NULL; |
225 | Index: linux-2.6.16-ck1/kernel/sysctl.c |
226 | =================================================================== |
227 | --- linux-2.6.16-ck1.orig/kernel/sysctl.c 2006-03-20 20:46:48.000000000 +1100 |
228 | +++ linux-2.6.16-ck1/kernel/sysctl.c 2006-03-20 20:46:50.000000000 +1100 |
229 | @@ -229,6 +229,11 @@ static ctl_table root_table[] = { |
230 | { .ctl_name = 0 } |
231 | }; |
232 | |
233 | +/* Constants for minimum and maximum testing. |
234 | + We use these as one-element integer vectors. */ |
235 | +static int zero; |
236 | +static int one_hundred = 100; |
237 | + |
238 | static ctl_table kern_table[] = { |
239 | { |
240 | .ctl_name = KERN_OSTYPE, |
241 | @@ -639,6 +644,17 @@ static ctl_table kern_table[] = { |
242 | .mode = 0644, |
243 | .proc_handler = &proc_dointvec, |
244 | }, |
245 | + { |
246 | + .ctl_name = KERN_ISO_CPU, |
247 | + .procname = "iso_cpu", |
248 | + .data = &sched_iso_cpu, |
249 | + .maxlen = sizeof (int), |
250 | + .mode = 0644, |
251 | + .proc_handler = &proc_dointvec_minmax, |
252 | + .strategy = &sysctl_intvec, |
253 | + .extra1 = &zero, |
254 | + .extra2 = &one_hundred, |
255 | + }, |
256 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) |
257 | { |
258 | .ctl_name = KERN_UNKNOWN_NMI_PANIC, |
259 | @@ -702,12 +718,6 @@ static ctl_table kern_table[] = { |
260 | { .ctl_name = 0 } |
261 | }; |
262 | |
263 | -/* Constants for minimum and maximum testing in vm_table. |
264 | - We use these as one-element integer vectors. */ |
265 | -static int zero; |
266 | -static int one_hundred = 100; |
267 | - |
268 | - |
269 | static ctl_table vm_table[] = { |
270 | { |
271 | .ctl_name = VM_OVERCOMMIT_MEMORY, |