Annotation of /trunk/kernel26-alx/patches-2.6.20-r5/0003-2.6.20-sched-staircase17_compute_tunable.patch
Parent Directory | Revision Log
Revision 199 -
(hide annotations)
(download)
Fri May 18 11:04:36 2007 UTC (17 years, 4 months ago) by niro
File size: 6142 byte(s)
Fri May 18 11:04:36 2007 UTC (17 years, 4 months ago) by niro
File size: 6142 byte(s)
-import
1 | niro | 199 | Add the compute tunable for the staircase cpu scheduler. This modifies the |
2 | cpu scheduler behaviour for significantly longer cpu timeslices and delays | ||
3 | normal preemption to minimise the cpu cache harming effects of multiple | ||
4 | concurrent running tasks. This increases cpu throughput at the cost of | ||
5 | significantly increased latencies. | ||
6 | |||
7 | Signed-off-by: Con Kolivas <kernel@kolivas.org> | ||
8 | |||
9 | Documentation/sysctl/kernel.txt | 11 ++++++++++ | ||
10 | include/linux/sched.h | 2 - | ||
11 | kernel/sched.c | 41 +++++++++++++++++++++++++++++++--------- | ||
12 | kernel/sysctl.c | 8 +++++++ | ||
13 | 4 files changed, 52 insertions(+), 10 deletions(-) | ||
14 | |||
15 | Index: linux-2.6.20-ck1/include/linux/sched.h | ||
16 | =================================================================== | ||
17 | --- linux-2.6.20-ck1.orig/include/linux/sched.h 2007-02-16 19:01:30.000000000 +1100 | ||
18 | +++ linux-2.6.20-ck1/include/linux/sched.h 2007-02-16 19:01:30.000000000 +1100 | ||
19 | @@ -216,7 +216,7 @@ extern void show_stack(struct task_struc | ||
20 | |||
21 | void io_schedule(void); | ||
22 | long io_schedule_timeout(long timeout); | ||
23 | -extern int sched_interactive; | ||
24 | +extern int sched_interactive, sched_compute; | ||
25 | |||
26 | extern void cpu_init (void); | ||
27 | extern void trap_init(void); | ||
28 | Index: linux-2.6.20-ck1/kernel/sched.c | ||
29 | =================================================================== | ||
30 | --- linux-2.6.20-ck1.orig/kernel/sched.c 2007-02-16 19:01:30.000000000 +1100 | ||
31 | +++ linux-2.6.20-ck1/kernel/sched.c 2007-02-16 19:01:30.000000000 +1100 | ||
32 | @@ -63,8 +63,17 @@ | ||
33 | /* | ||
34 | * sched_interactive - sysctl which allows interactive tasks to have bonus | ||
35 | * raise its priority. | ||
36 | + * sched_compute - sysctl which enables long timeslices and delayed preemption | ||
37 | + * for compute server usage. | ||
38 | */ | ||
39 | int sched_interactive __read_mostly = 1; | ||
40 | +int sched_compute __read_mostly; | ||
41 | + | ||
42 | +/* | ||
43 | + * CACHE_DELAY is the time preemption is delayed in sched_compute mode | ||
44 | + * and is set to a nominal 10ms. | ||
45 | + */ | ||
46 | +#define CACHE_DELAY (10 * (HZ) / 1001 + 1) | ||
47 | |||
48 | /* | ||
49 | * Convert user-nice values [ -20 ... 0 ... 19 ] | ||
50 | @@ -96,9 +105,10 @@ int sched_interactive __read_mostly = 1; | ||
51 | |||
52 | /* | ||
53 | * This is the time all tasks within the same priority round robin. | ||
54 | - * Set to a minimum of 6ms. | ||
55 | + * Set to a minimum of 6ms. It is 10 times longer in compute mode. | ||
56 | */ | ||
57 | -#define RR_INTERVAL ((6 * HZ / 1001) + 1) | ||
58 | +#define _RR_INTERVAL ((6 * HZ / 1001) + 1) | ||
59 | +#define RR_INTERVAL (_RR_INTERVAL * (1 + 9 * sched_compute)) | ||
60 | #define DEF_TIMESLICE (RR_INTERVAL * 19) | ||
61 | |||
62 | /* | ||
63 | @@ -132,6 +142,7 @@ struct rq { | ||
64 | |||
65 | /* Cached timestamp set by update_cpu_clock() */ | ||
66 | unsigned long long most_recent_timestamp; | ||
67 | + unsigned short cache_ticks, preempted; | ||
68 | struct task_struct *curr, *idle; | ||
69 | unsigned long next_balance; | ||
70 | struct mm_struct *prev_mm; | ||
71 | @@ -873,7 +884,7 @@ static inline int __normal_prio(struct t | ||
72 | |||
73 | best_bonus = bonus(p); | ||
74 | prio = MAX_RT_PRIO + best_bonus; | ||
75 | - if (sched_interactive && !batch_task(p)) | ||
76 | + if (sched_interactive && !sched_compute && !batch_task(p)) | ||
77 | prio -= p->bonus; | ||
78 | |||
79 | rr = rr_interval(p); | ||
80 | @@ -1347,14 +1358,22 @@ static inline int wake_idle(int cpu, str | ||
81 | #endif | ||
82 | |||
83 | /* | ||
84 | - * Check to see if p preempts rq->curr and resched if it does. | ||
85 | + * Check to see if p preempts rq->curr and resched if it does. In compute | ||
86 | + * mode we do not preempt for at least CACHE_DELAY and set rq->preempted. | ||
87 | */ | ||
88 | -static inline void preempt(const struct task_struct *p, struct rq *rq) | ||
89 | +static void fastcall preempt(const struct task_struct *p, struct rq *rq) | ||
90 | { | ||
91 | - if (TASK_PREEMPTS_CURR(p, rq)) | ||
92 | - resched_task(rq->curr); | ||
93 | -} | ||
94 | + struct task_struct *curr = rq->curr; | ||
95 | |||
96 | + if (p->prio >= curr->prio) | ||
97 | + return; | ||
98 | + if (!sched_compute || rq->cache_ticks >= CACHE_DELAY || !p->mm || | ||
99 | + rt_task(p) || curr == rq->idle) { | ||
100 | + resched_task(curr); | ||
101 | + return; | ||
102 | + } | ||
103 | + rq->preempted = 1; | ||
104 | +} | ||
105 | |||
106 | /*** | ||
107 | * try_to_wake_up - wake up a thread | ||
108 | @@ -3008,6 +3027,9 @@ static void task_running_tick(struct rq | ||
109 | time_slice_expired(p, rq); | ||
110 | goto out_unlock; | ||
111 | } | ||
112 | + rq->cache_ticks++; | ||
113 | + if (rq->preempted && rq->cache_ticks >= CACHE_DELAY) | ||
114 | + set_tsk_need_resched(p); | ||
115 | out_unlock: | ||
116 | spin_unlock(&rq->lock); | ||
117 | } | ||
118 | @@ -3304,6 +3326,7 @@ switch_tasks: | ||
119 | |||
120 | sched_info_switch(prev, next); | ||
121 | if (likely(prev != next)) { | ||
122 | + rq->preempted = rq->cache_ticks = 0; | ||
123 | next->timestamp = now; | ||
124 | rq->nr_switches++; | ||
125 | rq->curr = next; | ||
126 | @@ -6625,7 +6648,7 @@ void __init sched_init(void) | ||
127 | rq = cpu_rq(i); | ||
128 | spin_lock_init(&rq->lock); | ||
129 | lockdep_set_class(&rq->lock, &rq->rq_lock_key); | ||
130 | - rq->nr_running = 0; | ||
131 | + rq->nr_running = rq->cache_ticks = rq->preempted = 0; | ||
132 | |||
133 | #ifdef CONFIG_SMP | ||
134 | rq->sd = NULL; | ||
135 | Index: linux-2.6.20-ck1/kernel/sysctl.c | ||
136 | =================================================================== | ||
137 | --- linux-2.6.20-ck1.orig/kernel/sysctl.c 2007-02-16 19:01:30.000000000 +1100 | ||
138 | +++ linux-2.6.20-ck1/kernel/sysctl.c 2007-02-16 19:01:30.000000000 +1100 | ||
139 | @@ -684,6 +684,14 @@ static ctl_table kern_table[] = { | ||
140 | .mode = 0644, | ||
141 | .proc_handler = &proc_dointvec, | ||
142 | }, | ||
143 | + { | ||
144 | + .ctl_name = CTL_UNNUMBERED, | ||
145 | + .procname = "compute", | ||
146 | + .data = &sched_compute, | ||
147 | + .maxlen = sizeof (int), | ||
148 | + .mode = 0644, | ||
149 | + .proc_handler = &proc_dointvec, | ||
150 | + }, | ||
151 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) | ||
152 | { | ||
153 | .ctl_name = KERN_UNKNOWN_NMI_PANIC, | ||
154 | Index: linux-2.6.20-ck1/Documentation/sysctl/kernel.txt | ||
155 | =================================================================== | ||
156 | --- linux-2.6.20-ck1.orig/Documentation/sysctl/kernel.txt 2007-02-16 19:01:30.000000000 +1100 | ||
157 | +++ linux-2.6.20-ck1/Documentation/sysctl/kernel.txt 2007-02-16 19:01:30.000000000 +1100 | ||
158 | @@ -18,6 +18,7 @@ Currently, these files might (depending | ||
159 | show up in /proc/sys/kernel: | ||
160 | - acpi_video_flags | ||
161 | - acct | ||
162 | +- compute | ||
163 | - core_pattern | ||
164 | - core_uses_pid | ||
165 | - ctrl-alt-del | ||
166 | @@ -85,6 +86,16 @@ valid for 30 seconds. | ||
167 | |||
168 | ============================================================== | ||
169 | |||
170 | +compute: | ||
171 | + | ||
172 | +This flag controls the long timeslice, delayed preemption mode in the | ||
173 | +cpu scheduler suitable for scientific computation applications. It | ||
174 | +leads to large latencies so is unsuitable for normal usage. | ||
175 | + | ||
176 | +Disabled by default. | ||
177 | + | ||
178 | +============================================================== | ||
179 | + | ||
180 | core_pattern: | ||
181 | |||
182 | core_pattern is used to specify a core dumpfile pattern name. |