Magellan Linux

Contents of /trunk/kernel26-alx/patches-2.6.20-r6/0003-2.6.20-sched-staircase17_compute_tunable.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1175 - (show annotations) (download)
Thu Oct 14 12:15:46 2010 UTC (13 years, 6 months ago) by niro
File size: 6142 byte(s)
-2.6.20-alx-r6 new magellan 0.5.2 kernel
1 Add the compute tunable for the staircase cpu scheduler. This modifies the
2 cpu scheduler behaviour for significantly longer cpu timeslices and delays
3 normal preemption to minimise the cpu cache harming effects of multiple
4 concurrent running tasks. This increases cpu throughput at the cost of
5 significantly increased latencies.
6
7 Signed-off-by: Con Kolivas <kernel@kolivas.org>
8
9 Documentation/sysctl/kernel.txt | 11 ++++++++++
10 include/linux/sched.h | 2 -
11 kernel/sched.c | 41 +++++++++++++++++++++++++++++++---------
12 kernel/sysctl.c | 8 +++++++
13 4 files changed, 52 insertions(+), 10 deletions(-)
14
15 Index: linux-2.6.20-ck1/include/linux/sched.h
16 ===================================================================
17 --- linux-2.6.20-ck1.orig/include/linux/sched.h 2007-02-16 19:01:30.000000000 +1100
18 +++ linux-2.6.20-ck1/include/linux/sched.h 2007-02-16 19:01:30.000000000 +1100
19 @@ -216,7 +216,7 @@ extern void show_stack(struct task_struc
20
21 void io_schedule(void);
22 long io_schedule_timeout(long timeout);
23 -extern int sched_interactive;
24 +extern int sched_interactive, sched_compute;
25
26 extern void cpu_init (void);
27 extern void trap_init(void);
28 Index: linux-2.6.20-ck1/kernel/sched.c
29 ===================================================================
30 --- linux-2.6.20-ck1.orig/kernel/sched.c 2007-02-16 19:01:30.000000000 +1100
31 +++ linux-2.6.20-ck1/kernel/sched.c 2007-02-16 19:01:30.000000000 +1100
32 @@ -63,8 +63,17 @@
33 /*
34 * sched_interactive - sysctl which allows interactive tasks to have bonus
35 * raise its priority.
36 + * sched_compute - sysctl which enables long timeslices and delayed preemption
37 + * for compute server usage.
38 */
39 int sched_interactive __read_mostly = 1;
40 +int sched_compute __read_mostly;
41 +
42 +/*
43 + * CACHE_DELAY is the time preemption is delayed in sched_compute mode
44 + * and is set to a nominal 10ms.
45 + */
46 +#define CACHE_DELAY (10 * (HZ) / 1001 + 1)
47
48 /*
49 * Convert user-nice values [ -20 ... 0 ... 19 ]
50 @@ -96,9 +105,10 @@ int sched_interactive __read_mostly = 1;
51
52 /*
53 * This is the time all tasks within the same priority round robin.
54 - * Set to a minimum of 6ms.
55 + * Set to a minimum of 6ms. It is 10 times longer in compute mode.
56 */
57 -#define RR_INTERVAL ((6 * HZ / 1001) + 1)
58 +#define _RR_INTERVAL ((6 * HZ / 1001) + 1)
59 +#define RR_INTERVAL (_RR_INTERVAL * (1 + 9 * sched_compute))
60 #define DEF_TIMESLICE (RR_INTERVAL * 19)
61
62 /*
63 @@ -132,6 +142,7 @@ struct rq {
64
65 /* Cached timestamp set by update_cpu_clock() */
66 unsigned long long most_recent_timestamp;
67 + unsigned short cache_ticks, preempted;
68 struct task_struct *curr, *idle;
69 unsigned long next_balance;
70 struct mm_struct *prev_mm;
71 @@ -873,7 +884,7 @@ static inline int __normal_prio(struct t
72
73 best_bonus = bonus(p);
74 prio = MAX_RT_PRIO + best_bonus;
75 - if (sched_interactive && !batch_task(p))
76 + if (sched_interactive && !sched_compute && !batch_task(p))
77 prio -= p->bonus;
78
79 rr = rr_interval(p);
80 @@ -1347,14 +1358,22 @@ static inline int wake_idle(int cpu, str
81 #endif
82
83 /*
84 - * Check to see if p preempts rq->curr and resched if it does.
85 + * Check to see if p preempts rq->curr and resched if it does. In compute
86 + * mode we do not preempt for at least CACHE_DELAY and set rq->preempted.
87 */
88 -static inline void preempt(const struct task_struct *p, struct rq *rq)
89 +static void fastcall preempt(const struct task_struct *p, struct rq *rq)
90 {
91 - if (TASK_PREEMPTS_CURR(p, rq))
92 - resched_task(rq->curr);
93 -}
94 + struct task_struct *curr = rq->curr;
95
96 + if (p->prio >= curr->prio)
97 + return;
98 + if (!sched_compute || rq->cache_ticks >= CACHE_DELAY || !p->mm ||
99 + rt_task(p) || curr == rq->idle) {
100 + resched_task(curr);
101 + return;
102 + }
103 + rq->preempted = 1;
104 +}
105
106 /***
107 * try_to_wake_up - wake up a thread
108 @@ -3008,6 +3027,9 @@ static void task_running_tick(struct rq
109 time_slice_expired(p, rq);
110 goto out_unlock;
111 }
112 + rq->cache_ticks++;
113 + if (rq->preempted && rq->cache_ticks >= CACHE_DELAY)
114 + set_tsk_need_resched(p);
115 out_unlock:
116 spin_unlock(&rq->lock);
117 }
118 @@ -3304,6 +3326,7 @@ switch_tasks:
119
120 sched_info_switch(prev, next);
121 if (likely(prev != next)) {
122 + rq->preempted = rq->cache_ticks = 0;
123 next->timestamp = now;
124 rq->nr_switches++;
125 rq->curr = next;
126 @@ -6625,7 +6648,7 @@ void __init sched_init(void)
127 rq = cpu_rq(i);
128 spin_lock_init(&rq->lock);
129 lockdep_set_class(&rq->lock, &rq->rq_lock_key);
130 - rq->nr_running = 0;
131 + rq->nr_running = rq->cache_ticks = rq->preempted = 0;
132
133 #ifdef CONFIG_SMP
134 rq->sd = NULL;
135 Index: linux-2.6.20-ck1/kernel/sysctl.c
136 ===================================================================
137 --- linux-2.6.20-ck1.orig/kernel/sysctl.c 2007-02-16 19:01:30.000000000 +1100
138 +++ linux-2.6.20-ck1/kernel/sysctl.c 2007-02-16 19:01:30.000000000 +1100
139 @@ -684,6 +684,14 @@ static ctl_table kern_table[] = {
140 .mode = 0644,
141 .proc_handler = &proc_dointvec,
142 },
143 + {
144 + .ctl_name = CTL_UNNUMBERED,
145 + .procname = "compute",
146 + .data = &sched_compute,
147 + .maxlen = sizeof (int),
148 + .mode = 0644,
149 + .proc_handler = &proc_dointvec,
150 + },
151 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
152 {
153 .ctl_name = KERN_UNKNOWN_NMI_PANIC,
154 Index: linux-2.6.20-ck1/Documentation/sysctl/kernel.txt
155 ===================================================================
156 --- linux-2.6.20-ck1.orig/Documentation/sysctl/kernel.txt 2007-02-16 19:01:30.000000000 +1100
157 +++ linux-2.6.20-ck1/Documentation/sysctl/kernel.txt 2007-02-16 19:01:30.000000000 +1100
158 @@ -18,6 +18,7 @@ Currently, these files might (depending
159 show up in /proc/sys/kernel:
160 - acpi_video_flags
161 - acct
162 +- compute
163 - core_pattern
164 - core_uses_pid
165 - ctrl-alt-del
166 @@ -85,6 +86,16 @@ valid for 30 seconds.
167
168 ==============================================================
169
170 +compute:
171 +
172 +This flag controls the long timeslice, delayed preemption mode in the
173 +cpu scheduler suitable for scientific computation applications. It
174 +leads to large latencies so is unsuitable for normal usage.
175 +
176 +Disabled by default.
177 +
178 +==============================================================
179 +
180 core_pattern:
181
182 core_pattern is used to specify a core dumpfile pattern name.