Annotation of /trunk/kernel26-magellan/patches-2.6.16-r12/0010-2.6.16-sched-iso-4.1.patch
Parent Directory | Revision Log
Revision 72 -
(hide annotations)
(download)
Mon Jun 5 09:25:38 2006 UTC (18 years, 3 months ago) by niro
File size: 8380 byte(s)
Mon Jun 5 09:25:38 2006 UTC (18 years, 3 months ago) by niro
File size: 8380 byte(s)
ver bump to 2.6.16-r12: - updated to linux-2.6.16.19 - updated to ck11
1 | niro | 72 | include/linux/sched.h | 9 ++++-- |
2 | include/linux/sysctl.h | 1 | ||
3 | kernel/sched.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++--- | ||
4 | kernel/sysctl.c | 22 ++++++++++---- | ||
5 | 4 files changed, 91 insertions(+), 13 deletions(-) | ||
6 | |||
7 | Index: linux-2.6.16-ck1/include/linux/sched.h | ||
8 | =================================================================== | ||
9 | --- linux-2.6.16-ck1.orig/include/linux/sched.h 2006-03-20 20:46:49.000000000 +1100 | ||
10 | +++ linux-2.6.16-ck1/include/linux/sched.h 2006-03-20 20:46:50.000000000 +1100 | ||
11 | @@ -162,9 +162,10 @@ extern unsigned long weighted_cpuload(co | ||
12 | #define SCHED_FIFO 1 | ||
13 | #define SCHED_RR 2 | ||
14 | #define SCHED_BATCH 3 | ||
15 | +#define SCHED_ISO 4 | ||
16 | |||
17 | #define SCHED_MIN 0 | ||
18 | -#define SCHED_MAX 3 | ||
19 | +#define SCHED_MAX 4 | ||
20 | |||
21 | #define SCHED_RANGE(policy) ((policy) >= SCHED_MIN && \ | ||
22 | (policy) <= SCHED_MAX) | ||
23 | @@ -208,7 +209,7 @@ extern void show_stack(struct task_struc | ||
24 | |||
25 | void io_schedule(void); | ||
26 | long io_schedule_timeout(long timeout); | ||
27 | -extern int sched_interactive, sched_compute; | ||
28 | +extern int sched_interactive, sched_compute, sched_iso_cpu; | ||
29 | |||
30 | extern void cpu_init (void); | ||
31 | extern void trap_init(void); | ||
32 | @@ -498,7 +499,8 @@ struct signal_struct { | ||
33 | |||
34 | #define MAX_PRIO (MAX_RT_PRIO + 40) | ||
35 | |||
36 | -#define rt_task(p) (unlikely((p)->prio < MAX_RT_PRIO)) | ||
37 | +#define rt_task(p) (unlikely(SCHED_RT((p)->policy))) | ||
38 | +#define iso_task(p) ((p)->policy == SCHED_ISO) | ||
39 | |||
40 | /* | ||
41 | * Some day this will be a full-fledged user tracking system.. | ||
42 | @@ -956,6 +958,7 @@ static inline void put_task_struct(struc | ||
43 | #define PF_RANDOMIZE 0x00800000 /* randomize virtual address space */ | ||
44 | #define PF_SWAPWRITE 0x01000000 /* Allowed to write to swap */ | ||
45 | #define PF_NONSLEEP 0x02000000 /* Waiting on in kernel activity */ | ||
46 | +#define PF_ISOREF 0x04000000 /* SCHED_ISO task has used up quota */ | ||
47 | |||
48 | /* | ||
49 | * Only the _current_ task can read/write to tsk->flags, but other | ||
50 | Index: linux-2.6.16-ck1/include/linux/sysctl.h | ||
51 | =================================================================== | ||
52 | --- linux-2.6.16-ck1.orig/include/linux/sysctl.h 2006-03-20 20:46:48.000000000 +1100 | ||
53 | +++ linux-2.6.16-ck1/include/linux/sysctl.h 2006-03-20 20:46:50.000000000 +1100 | ||
54 | @@ -150,6 +150,7 @@ enum | ||
55 | KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ | ||
56 | KERN_INTERACTIVE=73, /* interactive tasks can have cpu bursts */ | ||
57 | KERN_COMPUTE=74, /* adjust timeslices for a compute server */ | ||
58 | + KERN_ISO_CPU=75, /* percent cpu SCHED_ISO tasks run SCHED_RR */ | ||
59 | }; | ||
60 | |||
61 | |||
62 | Index: linux-2.6.16-ck1/kernel/sched.c | ||
63 | =================================================================== | ||
64 | --- linux-2.6.16-ck1.orig/kernel/sched.c 2006-03-20 20:46:49.000000000 +1100 | ||
65 | +++ linux-2.6.16-ck1/kernel/sched.c 2006-03-20 20:46:50.000000000 +1100 | ||
66 | @@ -92,6 +92,9 @@ int sched_compute __read_mostly = 0; | ||
67 | #define RR_INTERVAL() (_RR_INTERVAL * (1 + 16 * sched_compute)) | ||
68 | #define DEF_TIMESLICE (RR_INTERVAL() * 19) | ||
69 | |||
70 | +int sched_iso_cpu __read_mostly = 80; | ||
71 | +#define ISO_PERIOD (5 * HZ) | ||
72 | + | ||
73 | #define task_hot(p, now, sd) ((long long) ((now) - (p)->timestamp) \ | ||
74 | < (long long) (sd)->cache_hot_time) | ||
75 | |||
76 | @@ -129,6 +132,9 @@ struct runqueue { | ||
77 | */ | ||
78 | unsigned long nr_uninterruptible; | ||
79 | |||
80 | + unsigned long iso_ticks; | ||
81 | + unsigned int iso_refractory; | ||
82 | + | ||
83 | unsigned long long timestamp_last_tick; | ||
84 | unsigned int cache_ticks, preempted; | ||
85 | task_t *curr, *idle; | ||
86 | @@ -713,6 +719,17 @@ static int effective_prio(const task_t * | ||
87 | if (rt_task(p)) | ||
88 | return p->prio; | ||
89 | |||
90 | + if (iso_task(p)) { | ||
91 | + if (likely(!(p->flags & PF_ISOREF))) | ||
92 | + /* | ||
93 | + * If SCHED_ISO tasks have not used up their real time | ||
94 | + * quota they have run just better than highest | ||
95 | + * SCHED_NORMAL priority. Otherwise they run as | ||
96 | + * SCHED_NORMAL. | ||
97 | + */ | ||
98 | + return MAX_RT_PRIO - 1; | ||
99 | + } | ||
100 | + | ||
101 | full_slice = slice(p); | ||
102 | if (full_slice > p->slice) | ||
103 | used_slice = full_slice - p->slice; | ||
104 | @@ -2522,6 +2539,22 @@ static void time_slice_expired(task_t *p | ||
105 | } | ||
106 | |||
107 | /* | ||
108 | + * Test if SCHED_ISO tasks have run longer than their alloted period as RT | ||
109 | + * tasks and set the refractory flag if necessary. There is 10% hysteresis | ||
110 | + * for unsetting the flag. | ||
111 | + */ | ||
112 | +static inline unsigned int test_ret_isorefractory(runqueue_t *rq) | ||
113 | +{ | ||
114 | + if (likely(!rq->iso_refractory)) { | ||
115 | + if (rq->iso_ticks / ISO_PERIOD > sched_iso_cpu) | ||
116 | + rq->iso_refractory = 1; | ||
117 | + } else | ||
118 | + if (rq->iso_ticks / ISO_PERIOD < (sched_iso_cpu * 90 / 100)) | ||
119 | + rq->iso_refractory = 0; | ||
120 | + return rq->iso_refractory; | ||
121 | +} | ||
122 | + | ||
123 | +/* | ||
124 | * This function gets called by the timer code, with HZ frequency. | ||
125 | * We call it with interrupts disabled. | ||
126 | */ | ||
127 | @@ -2549,15 +2582,33 @@ void scheduler_tick(void) | ||
128 | set_tsk_need_resched(p); | ||
129 | goto out; | ||
130 | } | ||
131 | + | ||
132 | + spin_lock(&rq->lock); | ||
133 | + if (unlikely((rt_task(p) || (iso_task(p) && !rq->iso_refractory)) && | ||
134 | + p->mm)) { | ||
135 | + if (rq->iso_ticks <= (ISO_PERIOD * 100) - 100) | ||
136 | + rq->iso_ticks += 100; | ||
137 | + } else | ||
138 | + rq->iso_ticks = rq->iso_ticks * (ISO_PERIOD - 1) / ISO_PERIOD; | ||
139 | + | ||
140 | + if (iso_task(p)) { | ||
141 | + if (unlikely(test_ret_isorefractory(rq))) { | ||
142 | + if (!(p->flags & PF_ISOREF)) { | ||
143 | + set_tsk_need_resched(p); | ||
144 | + p->flags |= PF_ISOREF; | ||
145 | + } | ||
146 | + } else | ||
147 | + p->flags &= ~PF_ISOREF; | ||
148 | + } | ||
149 | + | ||
150 | /* | ||
151 | * SCHED_FIFO tasks never run out of timeslice. | ||
152 | */ | ||
153 | if (unlikely(p->policy == SCHED_FIFO)) { | ||
154 | expired_balance = 0; | ||
155 | - goto out; | ||
156 | + goto out_unlock; | ||
157 | } | ||
158 | |||
159 | - spin_lock(&rq->lock); | ||
160 | debit = ns_diff(rq->timestamp_last_tick, p->timestamp); | ||
161 | p->ns_debit += debit; | ||
162 | if (p->ns_debit < NSJIFFY) | ||
163 | @@ -2696,7 +2747,7 @@ static int dependent_sleeper(const int t | ||
164 | task_t *smt_curr = smt_rq->curr; | ||
165 | |||
166 | /* Kernel threads do not participate in dependent sleeping */ | ||
167 | - if (!p->mm || !smt_curr->mm || rt_task(p)) | ||
168 | + if (!p->mm || !smt_curr->mm || rt_task(p) || iso_task(p)) | ||
169 | goto check_smt_task; | ||
170 | |||
171 | /* | ||
172 | @@ -2723,7 +2774,7 @@ static int dependent_sleeper(const int t | ||
173 | |||
174 | check_smt_task: | ||
175 | if ((!smt_curr->mm && smt_curr != smt_rq->idle) || | ||
176 | - rt_task(smt_curr)) | ||
177 | + rt_task(smt_curr) || iso_task(smt_curr)) | ||
178 | continue; | ||
179 | if (!p->mm) { | ||
180 | wakeup_busy_runqueue(smt_rq); | ||
181 | @@ -3525,9 +3576,19 @@ int sched_setscheduler(struct task_struc | ||
182 | { | ||
183 | int retval; | ||
184 | int queued, oldprio, oldpolicy = -1; | ||
185 | + struct sched_param zero_param = { .sched_priority = 0 }; | ||
186 | unsigned long flags; | ||
187 | runqueue_t *rq; | ||
188 | |||
189 | + if (SCHED_RT(policy) && !capable(CAP_SYS_NICE)) { | ||
190 | + /* | ||
191 | + * If the caller requested an RT policy without having the | ||
192 | + * necessary rights, we downgrade the policy to SCHED_ISO. | ||
193 | + * We also set the parameter to zero to pass the checks. | ||
194 | + */ | ||
195 | + policy = SCHED_ISO; | ||
196 | + param = &zero_param; | ||
197 | + } | ||
198 | recheck: | ||
199 | /* double check policy once rq lock held */ | ||
200 | if (policy < 0) | ||
201 | @@ -4029,6 +4090,7 @@ asmlinkage long sys_sched_get_priority_m | ||
202 | break; | ||
203 | case SCHED_NORMAL: | ||
204 | case SCHED_BATCH: | ||
205 | + case SCHED_ISO: | ||
206 | ret = 0; | ||
207 | break; | ||
208 | } | ||
209 | @@ -4053,6 +4115,7 @@ asmlinkage long sys_sched_get_priority_m | ||
210 | break; | ||
211 | case SCHED_NORMAL: | ||
212 | case SCHED_BATCH: | ||
213 | + case SCHED_ISO: | ||
214 | ret = 0; | ||
215 | } | ||
216 | return ret; | ||
217 | @@ -5903,6 +5966,7 @@ void __init sched_init(void) | ||
218 | rq->nr_running = 0; | ||
219 | rq->cache_ticks = 0; | ||
220 | rq->preempted = 0; | ||
221 | + rq->iso_ticks = 0; | ||
222 | |||
223 | #ifdef CONFIG_SMP | ||
224 | rq->sd = NULL; | ||
225 | Index: linux-2.6.16-ck1/kernel/sysctl.c | ||
226 | =================================================================== | ||
227 | --- linux-2.6.16-ck1.orig/kernel/sysctl.c 2006-03-20 20:46:48.000000000 +1100 | ||
228 | +++ linux-2.6.16-ck1/kernel/sysctl.c 2006-03-20 20:46:50.000000000 +1100 | ||
229 | @@ -229,6 +229,11 @@ static ctl_table root_table[] = { | ||
230 | { .ctl_name = 0 } | ||
231 | }; | ||
232 | |||
233 | +/* Constants for minimum and maximum testing. | ||
234 | + We use these as one-element integer vectors. */ | ||
235 | +static int zero; | ||
236 | +static int one_hundred = 100; | ||
237 | + | ||
238 | static ctl_table kern_table[] = { | ||
239 | { | ||
240 | .ctl_name = KERN_OSTYPE, | ||
241 | @@ -639,6 +644,17 @@ static ctl_table kern_table[] = { | ||
242 | .mode = 0644, | ||
243 | .proc_handler = &proc_dointvec, | ||
244 | }, | ||
245 | + { | ||
246 | + .ctl_name = KERN_ISO_CPU, | ||
247 | + .procname = "iso_cpu", | ||
248 | + .data = &sched_iso_cpu, | ||
249 | + .maxlen = sizeof (int), | ||
250 | + .mode = 0644, | ||
251 | + .proc_handler = &proc_dointvec_minmax, | ||
252 | + .strategy = &sysctl_intvec, | ||
253 | + .extra1 = &zero, | ||
254 | + .extra2 = &one_hundred, | ||
255 | + }, | ||
256 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) | ||
257 | { | ||
258 | .ctl_name = KERN_UNKNOWN_NMI_PANIC, | ||
259 | @@ -702,12 +718,6 @@ static ctl_table kern_table[] = { | ||
260 | { .ctl_name = 0 } | ||
261 | }; | ||
262 | |||
263 | -/* Constants for minimum and maximum testing in vm_table. | ||
264 | - We use these as one-element integer vectors. */ | ||
265 | -static int zero; | ||
266 | -static int one_hundred = 100; | ||
267 | - | ||
268 | - | ||
269 | static ctl_table vm_table[] = { | ||
270 | { | ||
271 | .ctl_name = VM_OVERCOMMIT_MEMORY, |