Annotation of /trunk/kernel26-magellan/patches-2.6.16-r12/0012-2.6.16-sched-staircase14.2_15.patch
Parent Directory | Revision Log
Revision 72 -
(hide annotations)
(download)
Mon Jun 5 09:25:38 2006 UTC (18 years, 3 months ago) by niro
File size: 10915 byte(s)
Mon Jun 5 09:25:38 2006 UTC (18 years, 3 months ago) by niro
File size: 10915 byte(s)
ver bump to 2.6.16-r12: - updated to linux-2.6.16.19 - updated to ck11
1 | niro | 72 | --- |
2 | include/linux/sched.h | 2 | ||
3 | kernel/sched.c | 141 +++++++++++++++++++++++++------------------------- | ||
4 | 2 files changed, 72 insertions(+), 71 deletions(-) | ||
5 | |||
6 | Index: linux-2.6.16-ck3/kernel/sched.c | ||
7 | =================================================================== | ||
8 | --- linux-2.6.16-ck3.orig/kernel/sched.c 2006-04-02 11:46:55.000000000 +1000 | ||
9 | +++ linux-2.6.16-ck3/kernel/sched.c 2006-04-02 12:46:34.000000000 +1000 | ||
10 | @@ -16,9 +16,9 @@ | ||
11 | * by Davide Libenzi, preemptible kernel bits by Robert Love. | ||
12 | * 2003-09-03 Interactivity tuning by Con Kolivas. | ||
13 | * 2004-04-02 Scheduler domains code by Nick Piggin | ||
14 | - * 2006-03-16 New staircase scheduling policy by Con Kolivas with help | ||
15 | + * 2006-04-02 Staircase scheduling policy by Con Kolivas with help | ||
16 | * from William Lee Irwin III, Zwane Mwaikambo & Peter Williams. | ||
17 | - * Staircase v14.2 | ||
18 | + * Staircase v15 | ||
19 | */ | ||
20 | |||
21 | #include <linux/mm.h> | ||
22 | @@ -64,6 +64,7 @@ | ||
23 | #define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) | ||
24 | #define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20) | ||
25 | #define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio) | ||
26 | +#define MIN_USER_PRIO (MAX_PRIO - 2) | ||
27 | |||
28 | /* | ||
29 | * 'User priority' is the nice value converted to something we | ||
30 | @@ -77,9 +78,9 @@ | ||
31 | /* | ||
32 | * Some helpers for converting nanosecond timing to jiffy resolution | ||
33 | */ | ||
34 | -#define NS_TO_JIFFIES(TIME) ((TIME) / (1000000000 / HZ)) | ||
35 | -#define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ)) | ||
36 | #define NSJIFFY (1000000000 / HZ) /* One jiffy in ns */ | ||
37 | +#define NS_TO_JIFFIES(TIME) ((TIME) / NSJIFFY) | ||
38 | +#define JIFFIES_TO_NS(TIME) ((TIME) * NSJIFFY) | ||
39 | #define TASK_PREEMPTS_CURR(p, rq) ((p)->prio < (rq)->curr->prio) | ||
40 | |||
41 | int sched_compute __read_mostly = 0; | ||
42 | @@ -89,7 +90,7 @@ int sched_compute __read_mostly = 0; | ||
43 | *and has twenty times larger intervals. Set to a minimum of 6ms. | ||
44 | */ | ||
45 | #define _RR_INTERVAL ((6 * HZ / 1001) + 1) | ||
46 | -#define RR_INTERVAL() (_RR_INTERVAL * (1 + 16 * sched_compute)) | ||
47 | +#define RR_INTERVAL() (_RR_INTERVAL * (1 + 9 * sched_compute)) | ||
48 | #define DEF_TIMESLICE (RR_INTERVAL() * 19) | ||
49 | |||
50 | int sched_iso_cpu __read_mostly = 80; | ||
51 | @@ -133,10 +134,10 @@ struct runqueue { | ||
52 | unsigned long nr_uninterruptible; | ||
53 | |||
54 | unsigned long iso_ticks; | ||
55 | - unsigned int iso_refractory; | ||
56 | + unsigned short iso_refractory; | ||
57 | |||
58 | unsigned long long timestamp_last_tick; | ||
59 | - unsigned int cache_ticks, preempted; | ||
60 | + unsigned short cache_ticks, preempted; | ||
61 | task_t *curr, *idle; | ||
62 | struct mm_struct *prev_mm; | ||
63 | unsigned long bitmap[BITS_TO_LONGS(MAX_PRIO + 1)]; | ||
64 | @@ -507,7 +508,7 @@ static unsigned long ns_diff(const unsig | ||
65 | const unsigned long long v2) | ||
66 | { | ||
67 | unsigned long long vdiff; | ||
68 | - if (likely(v1 > v2)) { | ||
69 | + if (likely(v1 >= v2)) { | ||
70 | vdiff = v1 - v2; | ||
71 | #if BITS_PER_LONG < 64 | ||
72 | if (vdiff > (1 << 31)) | ||
73 | @@ -549,9 +550,16 @@ static void fastcall enqueue_task(task_t | ||
74 | * Put task to the end of the run list without the overhead of dequeue | ||
75 | * followed by enqueue. | ||
76 | */ | ||
77 | -static inline void requeue_task(task_t *p, runqueue_t *rq) | ||
78 | +static void fastcall requeue_task(task_t *p, runqueue_t *rq, const int prio) | ||
79 | { | ||
80 | - list_move_tail(&p->run_list, rq->queue + p->prio); | ||
81 | + list_move_tail(&p->run_list, rq->queue + prio); | ||
82 | + if (p->prio != prio) { | ||
83 | + if (list_empty(rq->queue + p->prio)) | ||
84 | + __clear_bit(p->prio, rq->bitmap); | ||
85 | + p->prio = prio; | ||
86 | + __set_bit(prio, rq->bitmap); | ||
87 | + } | ||
88 | + p->ns_debit = 0; | ||
89 | } | ||
90 | |||
91 | static inline void enqueue_task_head(task_t *p, runqueue_t *rq) | ||
92 | @@ -626,7 +634,7 @@ static inline void dec_nr_running(const | ||
93 | /* | ||
94 | * __activate_task - move a task to the runqueue. | ||
95 | */ | ||
96 | -static void fastcall __activate_task(task_t *p, runqueue_t *rq) | ||
97 | +static inline void __activate_task(task_t *p, runqueue_t *rq) | ||
98 | { | ||
99 | enqueue_task(p, rq); | ||
100 | inc_nr_running(p, rq); | ||
101 | @@ -680,20 +688,18 @@ static unsigned int fastcall slice(const | ||
102 | static void fastcall inc_bonus(task_t *p, const unsigned long totalrun, | ||
103 | const unsigned long sleep) | ||
104 | { | ||
105 | - unsigned int best_bonus; | ||
106 | + unsigned int best_bonus = sleep / (totalrun + 1); | ||
107 | |||
108 | - best_bonus = sleep / (totalrun + 1); | ||
109 | if (p->bonus >= best_bonus) | ||
110 | return; | ||
111 | - | ||
112 | - p->bonus++; | ||
113 | best_bonus = bonus(p); | ||
114 | - if (p->bonus > best_bonus) | ||
115 | - p->bonus = best_bonus; | ||
116 | + if (p->bonus < best_bonus) | ||
117 | + p->bonus++; | ||
118 | } | ||
119 | |||
120 | -static void dec_bonus(task_t *p) | ||
121 | +static inline void dec_bonus(task_t *p) | ||
122 | { | ||
123 | + p->totalrun = 0; | ||
124 | if (p->bonus) | ||
125 | p->bonus--; | ||
126 | } | ||
127 | @@ -739,7 +745,7 @@ static int effective_prio(task_t *p) | ||
128 | */ | ||
129 | p->time_slice = p->slice % RR_INTERVAL() ? : | ||
130 | RR_INTERVAL(); | ||
131 | - return MAX_PRIO - 2; | ||
132 | + return MIN_USER_PRIO; | ||
133 | } | ||
134 | return MAX_PRIO - 1; | ||
135 | } | ||
136 | @@ -755,8 +761,8 @@ static int effective_prio(task_t *p) | ||
137 | |||
138 | rr = rr_interval(p); | ||
139 | prio += used_slice / rr; | ||
140 | - if (prio > MAX_PRIO - 2) | ||
141 | - prio = MAX_PRIO - 2; | ||
142 | + if (prio > MIN_USER_PRIO) | ||
143 | + prio = MIN_USER_PRIO; | ||
144 | return prio; | ||
145 | } | ||
146 | |||
147 | @@ -764,13 +770,14 @@ static inline void continue_slice(task_t | ||
148 | { | ||
149 | unsigned long total_run = NS_TO_JIFFIES(p->totalrun); | ||
150 | |||
151 | - if (total_run >= p->slice) { | ||
152 | - p->totalrun -= JIFFIES_TO_NS(p->slice); | ||
153 | + if (total_run >= p->slice || p->prio == MIN_USER_PRIO) | ||
154 | dec_bonus(p); | ||
155 | - } else { | ||
156 | - unsigned int remainder; | ||
157 | + else { | ||
158 | + unsigned long remainder; | ||
159 | |||
160 | p->slice -= total_run; | ||
161 | + if (p->slice <= p->time_slice) | ||
162 | + dec_bonus(p); | ||
163 | remainder = p->slice % rr_interval(p); | ||
164 | if (remainder) | ||
165 | p->time_slice = remainder; | ||
166 | @@ -784,34 +791,35 @@ static inline void continue_slice(task_t | ||
167 | */ | ||
168 | static inline void recalc_task_prio(task_t *p, const unsigned long long now) | ||
169 | { | ||
170 | + /* Double the systime to account for missed sub-jiffy time */ | ||
171 | + unsigned long ns_systime = JIFFIES_TO_NS(p->systime) * 2; | ||
172 | unsigned long sleep_time = ns_diff(now, p->timestamp); | ||
173 | |||
174 | /* | ||
175 | - * Add the total for this last scheduled run (p->runtime) to the | ||
176 | - * running total so far used (p->totalrun). | ||
177 | - */ | ||
178 | - p->totalrun += p->runtime; | ||
179 | + * Add the total for this last scheduled run (p->runtime) and system | ||
180 | + * time (p->systime) done on behalf of p to the running total so far | ||
181 | + * used (p->totalrun). | ||
182 | + */ | ||
183 | + p->totalrun += p->runtime + ns_systime; | ||
184 | + | ||
185 | + /* systime is unintentionally seen as sleep, subtract it */ | ||
186 | + if (likely(ns_systime < sleep_time)) | ||
187 | + sleep_time -= ns_systime; | ||
188 | + else | ||
189 | + sleep_time = 0; | ||
190 | |||
191 | /* | ||
192 | * If we sleep longer than our running total and have not set the | ||
193 | * PF_NONSLEEP flag we gain a bonus. | ||
194 | */ | ||
195 | - if (sleep_time >= p->totalrun && !(p->flags & PF_NONSLEEP) && | ||
196 | - !sched_compute) { | ||
197 | - inc_bonus(p, p->totalrun, sleep_time); | ||
198 | - p->totalrun = 0; | ||
199 | - return; | ||
200 | + if (sleep_time >= p->totalrun && !(p->flags & PF_NONSLEEP)) { | ||
201 | + inc_bonus(p, p->totalrun, sleep_time); | ||
202 | + p->totalrun = 0; | ||
203 | + return; | ||
204 | } | ||
205 | |||
206 | - /* | ||
207 | - * If we have not set the PF_NONSLEEP flag we elevate priority by the | ||
208 | - * amount of time we slept. | ||
209 | - */ | ||
210 | - if (p->flags & PF_NONSLEEP) | ||
211 | - p->flags &= ~PF_NONSLEEP; | ||
212 | - else | ||
213 | - p->totalrun -= sleep_time; | ||
214 | - | ||
215 | + /* We elevate priority by the amount of time we slept. */ | ||
216 | + p->totalrun -= sleep_time; | ||
217 | continue_slice(p); | ||
218 | } | ||
219 | |||
220 | @@ -839,6 +847,7 @@ static void activate_task(task_t *p, run | ||
221 | if (!rt_task(p)) { | ||
222 | recalc_task_prio(p, now); | ||
223 | p->flags &= ~PF_NONSLEEP; | ||
224 | + p->systime = 0; | ||
225 | p->prio = effective_prio(p); | ||
226 | } | ||
227 | p->timestamp = now; | ||
228 | @@ -1220,11 +1229,15 @@ static inline int wake_idle(const int cp | ||
229 | */ | ||
230 | static void fastcall preempt(const task_t *p, runqueue_t *rq) | ||
231 | { | ||
232 | - if (p->prio >= rq->curr->prio) | ||
233 | + task_t *curr = rq->curr; | ||
234 | + | ||
235 | + if (p->prio >= curr->prio) | ||
236 | return; | ||
237 | - if (!sched_compute || rq->cache_ticks >= CACHE_DELAY || | ||
238 | - !p->mm || rt_task(p)) | ||
239 | - resched_task(rq->curr); | ||
240 | + if (!sched_compute || rq->cache_ticks >= CACHE_DELAY || !p->mm || | ||
241 | + rt_task(p) || curr == rq->idle) { | ||
242 | + resched_task(curr); | ||
243 | + return; | ||
244 | + } | ||
245 | rq->preempted = 1; | ||
246 | } | ||
247 | |||
248 | @@ -1448,21 +1461,20 @@ void fastcall wake_up_new_task(task_t *p | ||
249 | this_cpu = smp_processor_id(); | ||
250 | cpu = task_cpu(p); | ||
251 | |||
252 | - /* | ||
253 | - * Forked process gets no bonus to prevent fork bombs. | ||
254 | - */ | ||
255 | + /* Forked process gets no bonus to prevent fork bombs. */ | ||
256 | p->bonus = 0; | ||
257 | + current->flags |= PF_NONSLEEP; | ||
258 | |||
259 | if (likely(cpu == this_cpu)) { | ||
260 | - current->flags |= PF_NONSLEEP; | ||
261 | activate_task(p, rq, 1); | ||
262 | - if (!(clone_flags & CLONE_VM)) | ||
263 | + if (!(clone_flags & CLONE_VM)) { | ||
264 | /* | ||
265 | * The VM isn't cloned, so we're in a good position to | ||
266 | * do child-runs-first in anticipation of an exec. This | ||
267 | * usually avoids a lot of COW overhead. | ||
268 | */ | ||
269 | set_need_resched(); | ||
270 | + } | ||
271 | /* | ||
272 | * We skip the following code due to cpu == this_cpu | ||
273 | * | ||
274 | @@ -1488,7 +1500,6 @@ void fastcall wake_up_new_task(task_t *p | ||
275 | */ | ||
276 | task_rq_unlock(rq, &flags); | ||
277 | this_rq = task_rq_lock(current, &flags); | ||
278 | - current->flags |= PF_NONSLEEP; | ||
279 | } | ||
280 | task_rq_unlock(this_rq, &flags); | ||
281 | } | ||
282 | @@ -2518,6 +2529,7 @@ void account_system_time(struct task_str | ||
283 | else | ||
284 | cpustat->idle = cputime64_add(cpustat->idle, tmp); | ||
285 | |||
286 | + p->systime++; | ||
287 | /* Account for system time used */ | ||
288 | acct_update_integrals(p); | ||
289 | } | ||
290 | @@ -2546,10 +2558,8 @@ void account_steal_time(struct task_stru | ||
291 | static void time_slice_expired(task_t *p, runqueue_t *rq) | ||
292 | { | ||
293 | set_tsk_need_resched(p); | ||
294 | - dequeue_task(p, rq); | ||
295 | - p->prio = effective_prio(p); | ||
296 | p->time_slice = rr_interval(p); | ||
297 | - enqueue_task(p, rq); | ||
298 | + requeue_task(p, rq, effective_prio(p)); | ||
299 | } | ||
300 | |||
301 | /* | ||
302 | @@ -2635,7 +2645,6 @@ void scheduler_tick(void) | ||
303 | dec_bonus(p); | ||
304 | p->slice = slice(p); | ||
305 | time_slice_expired(p, rq); | ||
306 | - p->totalrun = 0; | ||
307 | goto out_unlock; | ||
308 | } | ||
309 | /* | ||
310 | @@ -2994,8 +3003,7 @@ switch_tasks: | ||
311 | |||
312 | sched_info_switch(prev, next); | ||
313 | if (likely(prev != next)) { | ||
314 | - rq->preempted = 0; | ||
315 | - rq->cache_ticks = 0; | ||
316 | + rq->preempted = rq->cache_ticks = 0; | ||
317 | next->timestamp = now; | ||
318 | rq->nr_switches++; | ||
319 | rq->curr = next; | ||
320 | @@ -3969,14 +3977,9 @@ asmlinkage long sys_sched_yield(void) | ||
321 | current->slice = slice(current); | ||
322 | current->time_slice = rr_interval(current); | ||
323 | if (likely(!rt_task(current) && !idleprio_task(current))) | ||
324 | - newprio = MAX_PRIO - 2; | ||
325 | + newprio = MIN_USER_PRIO; | ||
326 | |||
327 | - if (newprio != current->prio) { | ||
328 | - dequeue_task(current, rq); | ||
329 | - current->prio = newprio; | ||
330 | - enqueue_task(current, rq); | ||
331 | - } else | ||
332 | - requeue_task(current, rq); | ||
333 | + requeue_task(current, rq, newprio); | ||
334 | |||
335 | /* | ||
336 | * Since we are going to call schedule() anyway, there's | ||
337 | @@ -6002,10 +6005,8 @@ void __init sched_init(void) | ||
338 | |||
339 | rq = cpu_rq(i); | ||
340 | spin_lock_init(&rq->lock); | ||
341 | - rq->nr_running = 0; | ||
342 | - rq->cache_ticks = 0; | ||
343 | - rq->preempted = 0; | ||
344 | - rq->iso_ticks = 0; | ||
345 | + rq->nr_running = rq->cache_ticks = rq->preempted = | ||
346 | + rq->iso_ticks = 0; | ||
347 | |||
348 | #ifdef CONFIG_SMP | ||
349 | rq->sd = NULL; | ||
350 | Index: linux-2.6.16-ck3/include/linux/sched.h | ||
351 | =================================================================== | ||
352 | --- linux-2.6.16-ck3.orig/include/linux/sched.h 2006-04-02 11:46:55.000000000 +1000 | ||
353 | +++ linux-2.6.16-ck3/include/linux/sched.h 2006-04-02 11:47:51.000000000 +1000 | ||
354 | @@ -739,7 +739,7 @@ struct task_struct { | ||
355 | unsigned short ioprio; | ||
356 | |||
357 | unsigned long long timestamp; | ||
358 | - unsigned long runtime, totalrun, ns_debit; | ||
359 | + unsigned long runtime, totalrun, ns_debit, systime; | ||
360 | unsigned int bonus; | ||
361 | unsigned int slice, time_slice; | ||
362 | unsigned long long sched_time; /* sched_clock time spent running */ |