Contents of /trunk/kernel26-magellan/patches-2.6.16-r10/0012-2.6.16-sched-staircase14.2_15.patch
Parent Directory | Revision Log
Revision 70 -
(show annotations)
(download)
Thu May 11 19:09:22 2006 UTC (18 years, 6 months ago) by niro
File size: 10915 byte(s)
Thu May 11 19:09:22 2006 UTC (18 years, 6 months ago) by niro
File size: 10915 byte(s)
import
1 | --- |
2 | include/linux/sched.h | 2 |
3 | kernel/sched.c | 141 +++++++++++++++++++++++++------------------------- |
4 | 2 files changed, 72 insertions(+), 71 deletions(-) |
5 | |
6 | Index: linux-2.6.16-ck3/kernel/sched.c |
7 | =================================================================== |
8 | --- linux-2.6.16-ck3.orig/kernel/sched.c 2006-04-02 11:46:55.000000000 +1000 |
9 | +++ linux-2.6.16-ck3/kernel/sched.c 2006-04-02 12:46:34.000000000 +1000 |
10 | @@ -16,9 +16,9 @@ |
11 | * by Davide Libenzi, preemptible kernel bits by Robert Love. |
12 | * 2003-09-03 Interactivity tuning by Con Kolivas. |
13 | * 2004-04-02 Scheduler domains code by Nick Piggin |
14 | - * 2006-03-16 New staircase scheduling policy by Con Kolivas with help |
15 | + * 2006-04-02 Staircase scheduling policy by Con Kolivas with help |
16 | * from William Lee Irwin III, Zwane Mwaikambo & Peter Williams. |
17 | - * Staircase v14.2 |
18 | + * Staircase v15 |
19 | */ |
20 | |
21 | #include <linux/mm.h> |
22 | @@ -64,6 +64,7 @@ |
23 | #define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) |
24 | #define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20) |
25 | #define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio) |
26 | +#define MIN_USER_PRIO (MAX_PRIO - 2) |
27 | |
28 | /* |
29 | * 'User priority' is the nice value converted to something we |
30 | @@ -77,9 +78,9 @@ |
31 | /* |
32 | * Some helpers for converting nanosecond timing to jiffy resolution |
33 | */ |
34 | -#define NS_TO_JIFFIES(TIME) ((TIME) / (1000000000 / HZ)) |
35 | -#define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ)) |
36 | #define NSJIFFY (1000000000 / HZ) /* One jiffy in ns */ |
37 | +#define NS_TO_JIFFIES(TIME) ((TIME) / NSJIFFY) |
38 | +#define JIFFIES_TO_NS(TIME) ((TIME) * NSJIFFY) |
39 | #define TASK_PREEMPTS_CURR(p, rq) ((p)->prio < (rq)->curr->prio) |
40 | |
41 | int sched_compute __read_mostly = 0; |
42 | @@ -89,7 +90,7 @@ int sched_compute __read_mostly = 0; |
43 | *and has twenty times larger intervals. Set to a minimum of 6ms. |
44 | */ |
45 | #define _RR_INTERVAL ((6 * HZ / 1001) + 1) |
46 | -#define RR_INTERVAL() (_RR_INTERVAL * (1 + 16 * sched_compute)) |
47 | +#define RR_INTERVAL() (_RR_INTERVAL * (1 + 9 * sched_compute)) |
48 | #define DEF_TIMESLICE (RR_INTERVAL() * 19) |
49 | |
50 | int sched_iso_cpu __read_mostly = 80; |
51 | @@ -133,10 +134,10 @@ struct runqueue { |
52 | unsigned long nr_uninterruptible; |
53 | |
54 | unsigned long iso_ticks; |
55 | - unsigned int iso_refractory; |
56 | + unsigned short iso_refractory; |
57 | |
58 | unsigned long long timestamp_last_tick; |
59 | - unsigned int cache_ticks, preempted; |
60 | + unsigned short cache_ticks, preempted; |
61 | task_t *curr, *idle; |
62 | struct mm_struct *prev_mm; |
63 | unsigned long bitmap[BITS_TO_LONGS(MAX_PRIO + 1)]; |
64 | @@ -507,7 +508,7 @@ static unsigned long ns_diff(const unsig |
65 | const unsigned long long v2) |
66 | { |
67 | unsigned long long vdiff; |
68 | - if (likely(v1 > v2)) { |
69 | + if (likely(v1 >= v2)) { |
70 | vdiff = v1 - v2; |
71 | #if BITS_PER_LONG < 64 |
72 | if (vdiff > (1 << 31)) |
73 | @@ -549,9 +550,16 @@ static void fastcall enqueue_task(task_t |
74 | * Put task to the end of the run list without the overhead of dequeue |
75 | * followed by enqueue. |
76 | */ |
77 | -static inline void requeue_task(task_t *p, runqueue_t *rq) |
78 | +static void fastcall requeue_task(task_t *p, runqueue_t *rq, const int prio) |
79 | { |
80 | - list_move_tail(&p->run_list, rq->queue + p->prio); |
81 | + list_move_tail(&p->run_list, rq->queue + prio); |
82 | + if (p->prio != prio) { |
83 | + if (list_empty(rq->queue + p->prio)) |
84 | + __clear_bit(p->prio, rq->bitmap); |
85 | + p->prio = prio; |
86 | + __set_bit(prio, rq->bitmap); |
87 | + } |
88 | + p->ns_debit = 0; |
89 | } |
90 | |
91 | static inline void enqueue_task_head(task_t *p, runqueue_t *rq) |
92 | @@ -626,7 +634,7 @@ static inline void dec_nr_running(const |
93 | /* |
94 | * __activate_task - move a task to the runqueue. |
95 | */ |
96 | -static void fastcall __activate_task(task_t *p, runqueue_t *rq) |
97 | +static inline void __activate_task(task_t *p, runqueue_t *rq) |
98 | { |
99 | enqueue_task(p, rq); |
100 | inc_nr_running(p, rq); |
101 | @@ -680,20 +688,18 @@ static unsigned int fastcall slice(const |
102 | static void fastcall inc_bonus(task_t *p, const unsigned long totalrun, |
103 | const unsigned long sleep) |
104 | { |
105 | - unsigned int best_bonus; |
106 | + unsigned int best_bonus = sleep / (totalrun + 1); |
107 | |
108 | - best_bonus = sleep / (totalrun + 1); |
109 | if (p->bonus >= best_bonus) |
110 | return; |
111 | - |
112 | - p->bonus++; |
113 | best_bonus = bonus(p); |
114 | - if (p->bonus > best_bonus) |
115 | - p->bonus = best_bonus; |
116 | + if (p->bonus < best_bonus) |
117 | + p->bonus++; |
118 | } |
119 | |
120 | -static void dec_bonus(task_t *p) |
121 | +static inline void dec_bonus(task_t *p) |
122 | { |
123 | + p->totalrun = 0; |
124 | if (p->bonus) |
125 | p->bonus--; |
126 | } |
127 | @@ -739,7 +745,7 @@ static int effective_prio(task_t *p) |
128 | */ |
129 | p->time_slice = p->slice % RR_INTERVAL() ? : |
130 | RR_INTERVAL(); |
131 | - return MAX_PRIO - 2; |
132 | + return MIN_USER_PRIO; |
133 | } |
134 | return MAX_PRIO - 1; |
135 | } |
136 | @@ -755,8 +761,8 @@ static int effective_prio(task_t *p) |
137 | |
138 | rr = rr_interval(p); |
139 | prio += used_slice / rr; |
140 | - if (prio > MAX_PRIO - 2) |
141 | - prio = MAX_PRIO - 2; |
142 | + if (prio > MIN_USER_PRIO) |
143 | + prio = MIN_USER_PRIO; |
144 | return prio; |
145 | } |
146 | |
147 | @@ -764,13 +770,14 @@ static inline void continue_slice(task_t |
148 | { |
149 | unsigned long total_run = NS_TO_JIFFIES(p->totalrun); |
150 | |
151 | - if (total_run >= p->slice) { |
152 | - p->totalrun -= JIFFIES_TO_NS(p->slice); |
153 | + if (total_run >= p->slice || p->prio == MIN_USER_PRIO) |
154 | dec_bonus(p); |
155 | - } else { |
156 | - unsigned int remainder; |
157 | + else { |
158 | + unsigned long remainder; |
159 | |
160 | p->slice -= total_run; |
161 | + if (p->slice <= p->time_slice) |
162 | + dec_bonus(p); |
163 | remainder = p->slice % rr_interval(p); |
164 | if (remainder) |
165 | p->time_slice = remainder; |
166 | @@ -784,34 +791,35 @@ static inline void continue_slice(task_t |
167 | */ |
168 | static inline void recalc_task_prio(task_t *p, const unsigned long long now) |
169 | { |
170 | + /* Double the systime to account for missed sub-jiffy time */ |
171 | + unsigned long ns_systime = JIFFIES_TO_NS(p->systime) * 2; |
172 | unsigned long sleep_time = ns_diff(now, p->timestamp); |
173 | |
174 | /* |
175 | - * Add the total for this last scheduled run (p->runtime) to the |
176 | - * running total so far used (p->totalrun). |
177 | - */ |
178 | - p->totalrun += p->runtime; |
179 | + * Add the total for this last scheduled run (p->runtime) and system |
180 | + * time (p->systime) done on behalf of p to the running total so far |
181 | + * used (p->totalrun). |
182 | + */ |
183 | + p->totalrun += p->runtime + ns_systime; |
184 | + |
185 | + /* systime is unintentionally seen as sleep, subtract it */ |
186 | + if (likely(ns_systime < sleep_time)) |
187 | + sleep_time -= ns_systime; |
188 | + else |
189 | + sleep_time = 0; |
190 | |
191 | /* |
192 | * If we sleep longer than our running total and have not set the |
193 | * PF_NONSLEEP flag we gain a bonus. |
194 | */ |
195 | - if (sleep_time >= p->totalrun && !(p->flags & PF_NONSLEEP) && |
196 | - !sched_compute) { |
197 | - inc_bonus(p, p->totalrun, sleep_time); |
198 | - p->totalrun = 0; |
199 | - return; |
200 | + if (sleep_time >= p->totalrun && !(p->flags & PF_NONSLEEP)) { |
201 | + inc_bonus(p, p->totalrun, sleep_time); |
202 | + p->totalrun = 0; |
203 | + return; |
204 | } |
205 | |
206 | - /* |
207 | - * If we have not set the PF_NONSLEEP flag we elevate priority by the |
208 | - * amount of time we slept. |
209 | - */ |
210 | - if (p->flags & PF_NONSLEEP) |
211 | - p->flags &= ~PF_NONSLEEP; |
212 | - else |
213 | - p->totalrun -= sleep_time; |
214 | - |
215 | + /* We elevate priority by the amount of time we slept. */ |
216 | + p->totalrun -= sleep_time; |
217 | continue_slice(p); |
218 | } |
219 | |
220 | @@ -839,6 +847,7 @@ static void activate_task(task_t *p, run |
221 | if (!rt_task(p)) { |
222 | recalc_task_prio(p, now); |
223 | p->flags &= ~PF_NONSLEEP; |
224 | + p->systime = 0; |
225 | p->prio = effective_prio(p); |
226 | } |
227 | p->timestamp = now; |
228 | @@ -1220,11 +1229,15 @@ static inline int wake_idle(const int cp |
229 | */ |
230 | static void fastcall preempt(const task_t *p, runqueue_t *rq) |
231 | { |
232 | - if (p->prio >= rq->curr->prio) |
233 | + task_t *curr = rq->curr; |
234 | + |
235 | + if (p->prio >= curr->prio) |
236 | return; |
237 | - if (!sched_compute || rq->cache_ticks >= CACHE_DELAY || |
238 | - !p->mm || rt_task(p)) |
239 | - resched_task(rq->curr); |
240 | + if (!sched_compute || rq->cache_ticks >= CACHE_DELAY || !p->mm || |
241 | + rt_task(p) || curr == rq->idle) { |
242 | + resched_task(curr); |
243 | + return; |
244 | + } |
245 | rq->preempted = 1; |
246 | } |
247 | |
248 | @@ -1448,21 +1461,20 @@ void fastcall wake_up_new_task(task_t *p |
249 | this_cpu = smp_processor_id(); |
250 | cpu = task_cpu(p); |
251 | |
252 | - /* |
253 | - * Forked process gets no bonus to prevent fork bombs. |
254 | - */ |
255 | + /* Forked process gets no bonus to prevent fork bombs. */ |
256 | p->bonus = 0; |
257 | + current->flags |= PF_NONSLEEP; |
258 | |
259 | if (likely(cpu == this_cpu)) { |
260 | - current->flags |= PF_NONSLEEP; |
261 | activate_task(p, rq, 1); |
262 | - if (!(clone_flags & CLONE_VM)) |
263 | + if (!(clone_flags & CLONE_VM)) { |
264 | /* |
265 | * The VM isn't cloned, so we're in a good position to |
266 | * do child-runs-first in anticipation of an exec. This |
267 | * usually avoids a lot of COW overhead. |
268 | */ |
269 | set_need_resched(); |
270 | + } |
271 | /* |
272 | * We skip the following code due to cpu == this_cpu |
273 | * |
274 | @@ -1488,7 +1500,6 @@ void fastcall wake_up_new_task(task_t *p |
275 | */ |
276 | task_rq_unlock(rq, &flags); |
277 | this_rq = task_rq_lock(current, &flags); |
278 | - current->flags |= PF_NONSLEEP; |
279 | } |
280 | task_rq_unlock(this_rq, &flags); |
281 | } |
282 | @@ -2518,6 +2529,7 @@ void account_system_time(struct task_str |
283 | else |
284 | cpustat->idle = cputime64_add(cpustat->idle, tmp); |
285 | |
286 | + p->systime++; |
287 | /* Account for system time used */ |
288 | acct_update_integrals(p); |
289 | } |
290 | @@ -2546,10 +2558,8 @@ void account_steal_time(struct task_stru |
291 | static void time_slice_expired(task_t *p, runqueue_t *rq) |
292 | { |
293 | set_tsk_need_resched(p); |
294 | - dequeue_task(p, rq); |
295 | - p->prio = effective_prio(p); |
296 | p->time_slice = rr_interval(p); |
297 | - enqueue_task(p, rq); |
298 | + requeue_task(p, rq, effective_prio(p)); |
299 | } |
300 | |
301 | /* |
302 | @@ -2635,7 +2645,6 @@ void scheduler_tick(void) |
303 | dec_bonus(p); |
304 | p->slice = slice(p); |
305 | time_slice_expired(p, rq); |
306 | - p->totalrun = 0; |
307 | goto out_unlock; |
308 | } |
309 | /* |
310 | @@ -2994,8 +3003,7 @@ switch_tasks: |
311 | |
312 | sched_info_switch(prev, next); |
313 | if (likely(prev != next)) { |
314 | - rq->preempted = 0; |
315 | - rq->cache_ticks = 0; |
316 | + rq->preempted = rq->cache_ticks = 0; |
317 | next->timestamp = now; |
318 | rq->nr_switches++; |
319 | rq->curr = next; |
320 | @@ -3969,14 +3977,9 @@ asmlinkage long sys_sched_yield(void) |
321 | current->slice = slice(current); |
322 | current->time_slice = rr_interval(current); |
323 | if (likely(!rt_task(current) && !idleprio_task(current))) |
324 | - newprio = MAX_PRIO - 2; |
325 | + newprio = MIN_USER_PRIO; |
326 | |
327 | - if (newprio != current->prio) { |
328 | - dequeue_task(current, rq); |
329 | - current->prio = newprio; |
330 | - enqueue_task(current, rq); |
331 | - } else |
332 | - requeue_task(current, rq); |
333 | + requeue_task(current, rq, newprio); |
334 | |
335 | /* |
336 | * Since we are going to call schedule() anyway, there's |
337 | @@ -6002,10 +6005,8 @@ void __init sched_init(void) |
338 | |
339 | rq = cpu_rq(i); |
340 | spin_lock_init(&rq->lock); |
341 | - rq->nr_running = 0; |
342 | - rq->cache_ticks = 0; |
343 | - rq->preempted = 0; |
344 | - rq->iso_ticks = 0; |
345 | + rq->nr_running = rq->cache_ticks = rq->preempted = |
346 | + rq->iso_ticks = 0; |
347 | |
348 | #ifdef CONFIG_SMP |
349 | rq->sd = NULL; |
350 | Index: linux-2.6.16-ck3/include/linux/sched.h |
351 | =================================================================== |
352 | --- linux-2.6.16-ck3.orig/include/linux/sched.h 2006-04-02 11:46:55.000000000 +1000 |
353 | +++ linux-2.6.16-ck3/include/linux/sched.h 2006-04-02 11:47:51.000000000 +1000 |
354 | @@ -739,7 +739,7 @@ struct task_struct { |
355 | unsigned short ioprio; |
356 | |
357 | unsigned long long timestamp; |
358 | - unsigned long runtime, totalrun, ns_debit; |
359 | + unsigned long runtime, totalrun, ns_debit, systime; |
360 | unsigned int bonus; |
361 | unsigned int slice, time_slice; |
362 | unsigned long long sched_time; /* sched_clock time spent running */ |