Contents of /trunk/kernel26-alx/patches-2.6.20-r6/0003-2.6.20-sched-staircase17_compute_tunable.patch
Parent Directory | Revision Log
Revision 1175 -
(show annotations)
(download)
Thu Oct 14 12:15:46 2010 UTC (13 years, 11 months ago) by niro
File size: 6142 byte(s)
Thu Oct 14 12:15:46 2010 UTC (13 years, 11 months ago) by niro
File size: 6142 byte(s)
-2.6.20-alx-r6 new magellan 0.5.2 kernel
1 | Add the compute tunable for the staircase cpu scheduler. This modifies the |
2 | cpu scheduler behaviour for significantly longer cpu timeslices and delays |
3 | normal preemption to minimise the cpu cache harming effects of multiple |
4 | concurrent running tasks. This increases cpu throughput at the cost of |
5 | significantly increased latencies. |
6 | |
7 | Signed-off-by: Con Kolivas <kernel@kolivas.org> |
8 | |
9 | Documentation/sysctl/kernel.txt | 11 ++++++++++ |
10 | include/linux/sched.h | 2 - |
11 | kernel/sched.c | 41 +++++++++++++++++++++++++++++++--------- |
12 | kernel/sysctl.c | 8 +++++++ |
13 | 4 files changed, 52 insertions(+), 10 deletions(-) |
14 | |
15 | Index: linux-2.6.20-ck1/include/linux/sched.h |
16 | =================================================================== |
17 | --- linux-2.6.20-ck1.orig/include/linux/sched.h 2007-02-16 19:01:30.000000000 +1100 |
18 | +++ linux-2.6.20-ck1/include/linux/sched.h 2007-02-16 19:01:30.000000000 +1100 |
19 | @@ -216,7 +216,7 @@ extern void show_stack(struct task_struc |
20 | |
21 | void io_schedule(void); |
22 | long io_schedule_timeout(long timeout); |
23 | -extern int sched_interactive; |
24 | +extern int sched_interactive, sched_compute; |
25 | |
26 | extern void cpu_init (void); |
27 | extern void trap_init(void); |
28 | Index: linux-2.6.20-ck1/kernel/sched.c |
29 | =================================================================== |
30 | --- linux-2.6.20-ck1.orig/kernel/sched.c 2007-02-16 19:01:30.000000000 +1100 |
31 | +++ linux-2.6.20-ck1/kernel/sched.c 2007-02-16 19:01:30.000000000 +1100 |
32 | @@ -63,8 +63,17 @@ |
33 | /* |
34 | * sched_interactive - sysctl which allows interactive tasks to have bonus |
35 | * raise its priority. |
36 | + * sched_compute - sysctl which enables long timeslices and delayed preemption |
37 | + * for compute server usage. |
38 | */ |
39 | int sched_interactive __read_mostly = 1; |
40 | +int sched_compute __read_mostly; |
41 | + |
42 | +/* |
43 | + * CACHE_DELAY is the time preemption is delayed in sched_compute mode |
44 | + * and is set to a nominal 10ms. |
45 | + */ |
46 | +#define CACHE_DELAY (10 * (HZ) / 1001 + 1) |
47 | |
48 | /* |
49 | * Convert user-nice values [ -20 ... 0 ... 19 ] |
50 | @@ -96,9 +105,10 @@ int sched_interactive __read_mostly = 1; |
51 | |
52 | /* |
53 | * This is the time all tasks within the same priority round robin. |
54 | - * Set to a minimum of 6ms. |
55 | + * Set to a minimum of 6ms. It is 10 times longer in compute mode. |
56 | */ |
57 | -#define RR_INTERVAL ((6 * HZ / 1001) + 1) |
58 | +#define _RR_INTERVAL ((6 * HZ / 1001) + 1) |
59 | +#define RR_INTERVAL (_RR_INTERVAL * (1 + 9 * sched_compute)) |
60 | #define DEF_TIMESLICE (RR_INTERVAL * 19) |
61 | |
62 | /* |
63 | @@ -132,6 +142,7 @@ struct rq { |
64 | |
65 | /* Cached timestamp set by update_cpu_clock() */ |
66 | unsigned long long most_recent_timestamp; |
67 | + unsigned short cache_ticks, preempted; |
68 | struct task_struct *curr, *idle; |
69 | unsigned long next_balance; |
70 | struct mm_struct *prev_mm; |
71 | @@ -873,7 +884,7 @@ static inline int __normal_prio(struct t |
72 | |
73 | best_bonus = bonus(p); |
74 | prio = MAX_RT_PRIO + best_bonus; |
75 | - if (sched_interactive && !batch_task(p)) |
76 | + if (sched_interactive && !sched_compute && !batch_task(p)) |
77 | prio -= p->bonus; |
78 | |
79 | rr = rr_interval(p); |
80 | @@ -1347,14 +1358,22 @@ static inline int wake_idle(int cpu, str |
81 | #endif |
82 | |
83 | /* |
84 | - * Check to see if p preempts rq->curr and resched if it does. |
85 | + * Check to see if p preempts rq->curr and resched if it does. In compute |
86 | + * mode we do not preempt for at least CACHE_DELAY and set rq->preempted. |
87 | */ |
88 | -static inline void preempt(const struct task_struct *p, struct rq *rq) |
89 | +static void fastcall preempt(const struct task_struct *p, struct rq *rq) |
90 | { |
91 | - if (TASK_PREEMPTS_CURR(p, rq)) |
92 | - resched_task(rq->curr); |
93 | -} |
94 | + struct task_struct *curr = rq->curr; |
95 | |
96 | + if (p->prio >= curr->prio) |
97 | + return; |
98 | + if (!sched_compute || rq->cache_ticks >= CACHE_DELAY || !p->mm || |
99 | + rt_task(p) || curr == rq->idle) { |
100 | + resched_task(curr); |
101 | + return; |
102 | + } |
103 | + rq->preempted = 1; |
104 | +} |
105 | |
106 | /*** |
107 | * try_to_wake_up - wake up a thread |
108 | @@ -3008,6 +3027,9 @@ static void task_running_tick(struct rq |
109 | time_slice_expired(p, rq); |
110 | goto out_unlock; |
111 | } |
112 | + rq->cache_ticks++; |
113 | + if (rq->preempted && rq->cache_ticks >= CACHE_DELAY) |
114 | + set_tsk_need_resched(p); |
115 | out_unlock: |
116 | spin_unlock(&rq->lock); |
117 | } |
118 | @@ -3304,6 +3326,7 @@ switch_tasks: |
119 | |
120 | sched_info_switch(prev, next); |
121 | if (likely(prev != next)) { |
122 | + rq->preempted = rq->cache_ticks = 0; |
123 | next->timestamp = now; |
124 | rq->nr_switches++; |
125 | rq->curr = next; |
126 | @@ -6625,7 +6648,7 @@ void __init sched_init(void) |
127 | rq = cpu_rq(i); |
128 | spin_lock_init(&rq->lock); |
129 | lockdep_set_class(&rq->lock, &rq->rq_lock_key); |
130 | - rq->nr_running = 0; |
131 | + rq->nr_running = rq->cache_ticks = rq->preempted = 0; |
132 | |
133 | #ifdef CONFIG_SMP |
134 | rq->sd = NULL; |
135 | Index: linux-2.6.20-ck1/kernel/sysctl.c |
136 | =================================================================== |
137 | --- linux-2.6.20-ck1.orig/kernel/sysctl.c 2007-02-16 19:01:30.000000000 +1100 |
138 | +++ linux-2.6.20-ck1/kernel/sysctl.c 2007-02-16 19:01:30.000000000 +1100 |
139 | @@ -684,6 +684,14 @@ static ctl_table kern_table[] = { |
140 | .mode = 0644, |
141 | .proc_handler = &proc_dointvec, |
142 | }, |
143 | + { |
144 | + .ctl_name = CTL_UNNUMBERED, |
145 | + .procname = "compute", |
146 | + .data = &sched_compute, |
147 | + .maxlen = sizeof (int), |
148 | + .mode = 0644, |
149 | + .proc_handler = &proc_dointvec, |
150 | + }, |
151 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) |
152 | { |
153 | .ctl_name = KERN_UNKNOWN_NMI_PANIC, |
154 | Index: linux-2.6.20-ck1/Documentation/sysctl/kernel.txt |
155 | =================================================================== |
156 | --- linux-2.6.20-ck1.orig/Documentation/sysctl/kernel.txt 2007-02-16 19:01:30.000000000 +1100 |
157 | +++ linux-2.6.20-ck1/Documentation/sysctl/kernel.txt 2007-02-16 19:01:30.000000000 +1100 |
158 | @@ -18,6 +18,7 @@ Currently, these files might (depending |
159 | show up in /proc/sys/kernel: |
160 | - acpi_video_flags |
161 | - acct |
162 | +- compute |
163 | - core_pattern |
164 | - core_uses_pid |
165 | - ctrl-alt-del |
166 | @@ -85,6 +86,16 @@ valid for 30 seconds. |
167 | |
168 | ============================================================== |
169 | |
170 | +compute: |
171 | + |
172 | +This flag controls the long timeslice, delayed preemption mode in the |
173 | +cpu scheduler suitable for scientific computation applications. It |
174 | +leads to large latencies so is unsuitable for normal usage. |
175 | + |
176 | +Disabled by default. |
177 | + |
178 | +============================================================== |
179 | + |
180 | core_pattern: |
181 | |
182 | core_pattern is used to specify a core dumpfile pattern name. |