Magellan Linux

Contents of /trunk/kernel26-magellan/patches-2.6.16-r10/0012-2.6.16-sched-staircase14.2_15.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 70 - (show annotations) (download)
Thu May 11 19:09:22 2006 UTC (18 years ago) by niro
File size: 10915 byte(s)
import

1 ---
2 include/linux/sched.h | 2
3 kernel/sched.c | 141 +++++++++++++++++++++++++-------------------------
4 2 files changed, 72 insertions(+), 71 deletions(-)
5
6 Index: linux-2.6.16-ck3/kernel/sched.c
7 ===================================================================
8 --- linux-2.6.16-ck3.orig/kernel/sched.c 2006-04-02 11:46:55.000000000 +1000
9 +++ linux-2.6.16-ck3/kernel/sched.c 2006-04-02 12:46:34.000000000 +1000
10 @@ -16,9 +16,9 @@
11 * by Davide Libenzi, preemptible kernel bits by Robert Love.
12 * 2003-09-03 Interactivity tuning by Con Kolivas.
13 * 2004-04-02 Scheduler domains code by Nick Piggin
14 - * 2006-03-16 New staircase scheduling policy by Con Kolivas with help
15 + * 2006-04-02 Staircase scheduling policy by Con Kolivas with help
16 * from William Lee Irwin III, Zwane Mwaikambo & Peter Williams.
17 - * Staircase v14.2
18 + * Staircase v15
19 */
20
21 #include <linux/mm.h>
22 @@ -64,6 +64,7 @@
23 #define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
24 #define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
25 #define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)
26 +#define MIN_USER_PRIO (MAX_PRIO - 2)
27
28 /*
29 * 'User priority' is the nice value converted to something we
30 @@ -77,9 +78,9 @@
31 /*
32 * Some helpers for converting nanosecond timing to jiffy resolution
33 */
34 -#define NS_TO_JIFFIES(TIME) ((TIME) / (1000000000 / HZ))
35 -#define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ))
36 #define NSJIFFY (1000000000 / HZ) /* One jiffy in ns */
37 +#define NS_TO_JIFFIES(TIME) ((TIME) / NSJIFFY)
38 +#define JIFFIES_TO_NS(TIME) ((TIME) * NSJIFFY)
39 #define TASK_PREEMPTS_CURR(p, rq) ((p)->prio < (rq)->curr->prio)
40
41 int sched_compute __read_mostly = 0;
42 @@ -89,7 +90,7 @@ int sched_compute __read_mostly = 0;
43 *and has twenty times larger intervals. Set to a minimum of 6ms.
44 */
45 #define _RR_INTERVAL ((6 * HZ / 1001) + 1)
46 -#define RR_INTERVAL() (_RR_INTERVAL * (1 + 16 * sched_compute))
47 +#define RR_INTERVAL() (_RR_INTERVAL * (1 + 9 * sched_compute))
48 #define DEF_TIMESLICE (RR_INTERVAL() * 19)
49
50 int sched_iso_cpu __read_mostly = 80;
51 @@ -133,10 +134,10 @@ struct runqueue {
52 unsigned long nr_uninterruptible;
53
54 unsigned long iso_ticks;
55 - unsigned int iso_refractory;
56 + unsigned short iso_refractory;
57
58 unsigned long long timestamp_last_tick;
59 - unsigned int cache_ticks, preempted;
60 + unsigned short cache_ticks, preempted;
61 task_t *curr, *idle;
62 struct mm_struct *prev_mm;
63 unsigned long bitmap[BITS_TO_LONGS(MAX_PRIO + 1)];
64 @@ -507,7 +508,7 @@ static unsigned long ns_diff(const unsig
65 const unsigned long long v2)
66 {
67 unsigned long long vdiff;
68 - if (likely(v1 > v2)) {
69 + if (likely(v1 >= v2)) {
70 vdiff = v1 - v2;
71 #if BITS_PER_LONG < 64
72 if (vdiff > (1 << 31))
73 @@ -549,9 +550,16 @@ static void fastcall enqueue_task(task_t
74 * Put task to the end of the run list without the overhead of dequeue
75 * followed by enqueue.
76 */
77 -static inline void requeue_task(task_t *p, runqueue_t *rq)
78 +static void fastcall requeue_task(task_t *p, runqueue_t *rq, const int prio)
79 {
80 - list_move_tail(&p->run_list, rq->queue + p->prio);
81 + list_move_tail(&p->run_list, rq->queue + prio);
82 + if (p->prio != prio) {
83 + if (list_empty(rq->queue + p->prio))
84 + __clear_bit(p->prio, rq->bitmap);
85 + p->prio = prio;
86 + __set_bit(prio, rq->bitmap);
87 + }
88 + p->ns_debit = 0;
89 }
90
91 static inline void enqueue_task_head(task_t *p, runqueue_t *rq)
92 @@ -626,7 +634,7 @@ static inline void dec_nr_running(const
93 /*
94 * __activate_task - move a task to the runqueue.
95 */
96 -static void fastcall __activate_task(task_t *p, runqueue_t *rq)
97 +static inline void __activate_task(task_t *p, runqueue_t *rq)
98 {
99 enqueue_task(p, rq);
100 inc_nr_running(p, rq);
101 @@ -680,20 +688,18 @@ static unsigned int fastcall slice(const
102 static void fastcall inc_bonus(task_t *p, const unsigned long totalrun,
103 const unsigned long sleep)
104 {
105 - unsigned int best_bonus;
106 + unsigned int best_bonus = sleep / (totalrun + 1);
107
108 - best_bonus = sleep / (totalrun + 1);
109 if (p->bonus >= best_bonus)
110 return;
111 -
112 - p->bonus++;
113 best_bonus = bonus(p);
114 - if (p->bonus > best_bonus)
115 - p->bonus = best_bonus;
116 + if (p->bonus < best_bonus)
117 + p->bonus++;
118 }
119
120 -static void dec_bonus(task_t *p)
121 +static inline void dec_bonus(task_t *p)
122 {
123 + p->totalrun = 0;
124 if (p->bonus)
125 p->bonus--;
126 }
127 @@ -739,7 +745,7 @@ static int effective_prio(task_t *p)
128 */
129 p->time_slice = p->slice % RR_INTERVAL() ? :
130 RR_INTERVAL();
131 - return MAX_PRIO - 2;
132 + return MIN_USER_PRIO;
133 }
134 return MAX_PRIO - 1;
135 }
136 @@ -755,8 +761,8 @@ static int effective_prio(task_t *p)
137
138 rr = rr_interval(p);
139 prio += used_slice / rr;
140 - if (prio > MAX_PRIO - 2)
141 - prio = MAX_PRIO - 2;
142 + if (prio > MIN_USER_PRIO)
143 + prio = MIN_USER_PRIO;
144 return prio;
145 }
146
147 @@ -764,13 +770,14 @@ static inline void continue_slice(task_t
148 {
149 unsigned long total_run = NS_TO_JIFFIES(p->totalrun);
150
151 - if (total_run >= p->slice) {
152 - p->totalrun -= JIFFIES_TO_NS(p->slice);
153 + if (total_run >= p->slice || p->prio == MIN_USER_PRIO)
154 dec_bonus(p);
155 - } else {
156 - unsigned int remainder;
157 + else {
158 + unsigned long remainder;
159
160 p->slice -= total_run;
161 + if (p->slice <= p->time_slice)
162 + dec_bonus(p);
163 remainder = p->slice % rr_interval(p);
164 if (remainder)
165 p->time_slice = remainder;
166 @@ -784,34 +791,35 @@ static inline void continue_slice(task_t
167 */
168 static inline void recalc_task_prio(task_t *p, const unsigned long long now)
169 {
170 + /* Double the systime to account for missed sub-jiffy time */
171 + unsigned long ns_systime = JIFFIES_TO_NS(p->systime) * 2;
172 unsigned long sleep_time = ns_diff(now, p->timestamp);
173
174 /*
175 - * Add the total for this last scheduled run (p->runtime) to the
176 - * running total so far used (p->totalrun).
177 - */
178 - p->totalrun += p->runtime;
179 + * Add the total for this last scheduled run (p->runtime) and system
180 + * time (p->systime) done on behalf of p to the running total so far
181 + * used (p->totalrun).
182 + */
183 + p->totalrun += p->runtime + ns_systime;
184 +
185 + /* systime is unintentionally seen as sleep, subtract it */
186 + if (likely(ns_systime < sleep_time))
187 + sleep_time -= ns_systime;
188 + else
189 + sleep_time = 0;
190
191 /*
192 * If we sleep longer than our running total and have not set the
193 * PF_NONSLEEP flag we gain a bonus.
194 */
195 - if (sleep_time >= p->totalrun && !(p->flags & PF_NONSLEEP) &&
196 - !sched_compute) {
197 - inc_bonus(p, p->totalrun, sleep_time);
198 - p->totalrun = 0;
199 - return;
200 + if (sleep_time >= p->totalrun && !(p->flags & PF_NONSLEEP)) {
201 + inc_bonus(p, p->totalrun, sleep_time);
202 + p->totalrun = 0;
203 + return;
204 }
205
206 - /*
207 - * If we have not set the PF_NONSLEEP flag we elevate priority by the
208 - * amount of time we slept.
209 - */
210 - if (p->flags & PF_NONSLEEP)
211 - p->flags &= ~PF_NONSLEEP;
212 - else
213 - p->totalrun -= sleep_time;
214 -
215 + /* We elevate priority by the amount of time we slept. */
216 + p->totalrun -= sleep_time;
217 continue_slice(p);
218 }
219
220 @@ -839,6 +847,7 @@ static void activate_task(task_t *p, run
221 if (!rt_task(p)) {
222 recalc_task_prio(p, now);
223 p->flags &= ~PF_NONSLEEP;
224 + p->systime = 0;
225 p->prio = effective_prio(p);
226 }
227 p->timestamp = now;
228 @@ -1220,11 +1229,15 @@ static inline int wake_idle(const int cp
229 */
230 static void fastcall preempt(const task_t *p, runqueue_t *rq)
231 {
232 - if (p->prio >= rq->curr->prio)
233 + task_t *curr = rq->curr;
234 +
235 + if (p->prio >= curr->prio)
236 return;
237 - if (!sched_compute || rq->cache_ticks >= CACHE_DELAY ||
238 - !p->mm || rt_task(p))
239 - resched_task(rq->curr);
240 + if (!sched_compute || rq->cache_ticks >= CACHE_DELAY || !p->mm ||
241 + rt_task(p) || curr == rq->idle) {
242 + resched_task(curr);
243 + return;
244 + }
245 rq->preempted = 1;
246 }
247
248 @@ -1448,21 +1461,20 @@ void fastcall wake_up_new_task(task_t *p
249 this_cpu = smp_processor_id();
250 cpu = task_cpu(p);
251
252 - /*
253 - * Forked process gets no bonus to prevent fork bombs.
254 - */
255 + /* Forked process gets no bonus to prevent fork bombs. */
256 p->bonus = 0;
257 + current->flags |= PF_NONSLEEP;
258
259 if (likely(cpu == this_cpu)) {
260 - current->flags |= PF_NONSLEEP;
261 activate_task(p, rq, 1);
262 - if (!(clone_flags & CLONE_VM))
263 + if (!(clone_flags & CLONE_VM)) {
264 /*
265 * The VM isn't cloned, so we're in a good position to
266 * do child-runs-first in anticipation of an exec. This
267 * usually avoids a lot of COW overhead.
268 */
269 set_need_resched();
270 + }
271 /*
272 * We skip the following code due to cpu == this_cpu
273 *
274 @@ -1488,7 +1500,6 @@ void fastcall wake_up_new_task(task_t *p
275 */
276 task_rq_unlock(rq, &flags);
277 this_rq = task_rq_lock(current, &flags);
278 - current->flags |= PF_NONSLEEP;
279 }
280 task_rq_unlock(this_rq, &flags);
281 }
282 @@ -2518,6 +2529,7 @@ void account_system_time(struct task_str
283 else
284 cpustat->idle = cputime64_add(cpustat->idle, tmp);
285
286 + p->systime++;
287 /* Account for system time used */
288 acct_update_integrals(p);
289 }
290 @@ -2546,10 +2558,8 @@ void account_steal_time(struct task_stru
291 static void time_slice_expired(task_t *p, runqueue_t *rq)
292 {
293 set_tsk_need_resched(p);
294 - dequeue_task(p, rq);
295 - p->prio = effective_prio(p);
296 p->time_slice = rr_interval(p);
297 - enqueue_task(p, rq);
298 + requeue_task(p, rq, effective_prio(p));
299 }
300
301 /*
302 @@ -2635,7 +2645,6 @@ void scheduler_tick(void)
303 dec_bonus(p);
304 p->slice = slice(p);
305 time_slice_expired(p, rq);
306 - p->totalrun = 0;
307 goto out_unlock;
308 }
309 /*
310 @@ -2994,8 +3003,7 @@ switch_tasks:
311
312 sched_info_switch(prev, next);
313 if (likely(prev != next)) {
314 - rq->preempted = 0;
315 - rq->cache_ticks = 0;
316 + rq->preempted = rq->cache_ticks = 0;
317 next->timestamp = now;
318 rq->nr_switches++;
319 rq->curr = next;
320 @@ -3969,14 +3977,9 @@ asmlinkage long sys_sched_yield(void)
321 current->slice = slice(current);
322 current->time_slice = rr_interval(current);
323 if (likely(!rt_task(current) && !idleprio_task(current)))
324 - newprio = MAX_PRIO - 2;
325 + newprio = MIN_USER_PRIO;
326
327 - if (newprio != current->prio) {
328 - dequeue_task(current, rq);
329 - current->prio = newprio;
330 - enqueue_task(current, rq);
331 - } else
332 - requeue_task(current, rq);
333 + requeue_task(current, rq, newprio);
334
335 /*
336 * Since we are going to call schedule() anyway, there's
337 @@ -6002,10 +6005,8 @@ void __init sched_init(void)
338
339 rq = cpu_rq(i);
340 spin_lock_init(&rq->lock);
341 - rq->nr_running = 0;
342 - rq->cache_ticks = 0;
343 - rq->preempted = 0;
344 - rq->iso_ticks = 0;
345 + rq->nr_running = rq->cache_ticks = rq->preempted =
346 + rq->iso_ticks = 0;
347
348 #ifdef CONFIG_SMP
349 rq->sd = NULL;
350 Index: linux-2.6.16-ck3/include/linux/sched.h
351 ===================================================================
352 --- linux-2.6.16-ck3.orig/include/linux/sched.h 2006-04-02 11:46:55.000000000 +1000
353 +++ linux-2.6.16-ck3/include/linux/sched.h 2006-04-02 11:47:51.000000000 +1000
354 @@ -739,7 +739,7 @@ struct task_struct {
355 unsigned short ioprio;
356
357 unsigned long long timestamp;
358 - unsigned long runtime, totalrun, ns_debit;
359 + unsigned long runtime, totalrun, ns_debit, systime;
360 unsigned int bonus;
361 unsigned int slice, time_slice;
362 unsigned long long sched_time; /* sched_clock time spent running */