Magellan Linux

Annotation of /trunk/kernel26-alx/patches-2.6.20-r6/0003-2.6.20-sched-staircase17_compute_tunable.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1175 - (hide annotations) (download)
Thu Oct 14 12:15:46 2010 UTC (13 years, 7 months ago) by niro
File size: 6142 byte(s)
-2.6.20-alx-r6 new magellan 0.5.2 kernel
1 niro 1175 Add the compute tunable for the staircase cpu scheduler. This modifies the
2     cpu scheduler behaviour for significantly longer cpu timeslices and delays
3     normal preemption to minimise the cpu cache harming effects of multiple
4     concurrent running tasks. This increases cpu throughput at the cost of
5     significantly increased latencies.
6    
7     Signed-off-by: Con Kolivas <kernel@kolivas.org>
8    
9     Documentation/sysctl/kernel.txt | 11 ++++++++++
10     include/linux/sched.h | 2 -
11     kernel/sched.c | 41 +++++++++++++++++++++++++++++++---------
12     kernel/sysctl.c | 8 +++++++
13     4 files changed, 52 insertions(+), 10 deletions(-)
14    
15     Index: linux-2.6.20-ck1/include/linux/sched.h
16     ===================================================================
17     --- linux-2.6.20-ck1.orig/include/linux/sched.h 2007-02-16 19:01:30.000000000 +1100
18     +++ linux-2.6.20-ck1/include/linux/sched.h 2007-02-16 19:01:30.000000000 +1100
19     @@ -216,7 +216,7 @@ extern void show_stack(struct task_struc
20    
21     void io_schedule(void);
22     long io_schedule_timeout(long timeout);
23     -extern int sched_interactive;
24     +extern int sched_interactive, sched_compute;
25    
26     extern void cpu_init (void);
27     extern void trap_init(void);
28     Index: linux-2.6.20-ck1/kernel/sched.c
29     ===================================================================
30     --- linux-2.6.20-ck1.orig/kernel/sched.c 2007-02-16 19:01:30.000000000 +1100
31     +++ linux-2.6.20-ck1/kernel/sched.c 2007-02-16 19:01:30.000000000 +1100
32     @@ -63,8 +63,17 @@
33     /*
34     * sched_interactive - sysctl which allows interactive tasks to have bonus
35     * raise its priority.
36     + * sched_compute - sysctl which enables long timeslices and delayed preemption
37     + * for compute server usage.
38     */
39     int sched_interactive __read_mostly = 1;
40     +int sched_compute __read_mostly;
41     +
42     +/*
43     + * CACHE_DELAY is the time preemption is delayed in sched_compute mode
44     + * and is set to a nominal 10ms.
45     + */
46     +#define CACHE_DELAY (10 * (HZ) / 1001 + 1)
47    
48     /*
49     * Convert user-nice values [ -20 ... 0 ... 19 ]
50     @@ -96,9 +105,10 @@ int sched_interactive __read_mostly = 1;
51    
52     /*
53     * This is the time all tasks within the same priority round robin.
54     - * Set to a minimum of 6ms.
55     + * Set to a minimum of 6ms. It is 10 times longer in compute mode.
56     */
57     -#define RR_INTERVAL ((6 * HZ / 1001) + 1)
58     +#define _RR_INTERVAL ((6 * HZ / 1001) + 1)
59     +#define RR_INTERVAL (_RR_INTERVAL * (1 + 9 * sched_compute))
60     #define DEF_TIMESLICE (RR_INTERVAL * 19)
61    
62     /*
63     @@ -132,6 +142,7 @@ struct rq {
64    
65     /* Cached timestamp set by update_cpu_clock() */
66     unsigned long long most_recent_timestamp;
67     + unsigned short cache_ticks, preempted;
68     struct task_struct *curr, *idle;
69     unsigned long next_balance;
70     struct mm_struct *prev_mm;
71     @@ -873,7 +884,7 @@ static inline int __normal_prio(struct t
72    
73     best_bonus = bonus(p);
74     prio = MAX_RT_PRIO + best_bonus;
75     - if (sched_interactive && !batch_task(p))
76     + if (sched_interactive && !sched_compute && !batch_task(p))
77     prio -= p->bonus;
78    
79     rr = rr_interval(p);
80     @@ -1347,14 +1358,22 @@ static inline int wake_idle(int cpu, str
81     #endif
82    
83     /*
84     - * Check to see if p preempts rq->curr and resched if it does.
85     + * Check to see if p preempts rq->curr and resched if it does. In compute
86     + * mode we do not preempt for at least CACHE_DELAY and set rq->preempted.
87     */
88     -static inline void preempt(const struct task_struct *p, struct rq *rq)
89     +static void fastcall preempt(const struct task_struct *p, struct rq *rq)
90     {
91     - if (TASK_PREEMPTS_CURR(p, rq))
92     - resched_task(rq->curr);
93     -}
94     + struct task_struct *curr = rq->curr;
95    
96     + if (p->prio >= curr->prio)
97     + return;
98     + if (!sched_compute || rq->cache_ticks >= CACHE_DELAY || !p->mm ||
99     + rt_task(p) || curr == rq->idle) {
100     + resched_task(curr);
101     + return;
102     + }
103     + rq->preempted = 1;
104     +}
105    
106     /***
107     * try_to_wake_up - wake up a thread
108     @@ -3008,6 +3027,9 @@ static void task_running_tick(struct rq
109     time_slice_expired(p, rq);
110     goto out_unlock;
111     }
112     + rq->cache_ticks++;
113     + if (rq->preempted && rq->cache_ticks >= CACHE_DELAY)
114     + set_tsk_need_resched(p);
115     out_unlock:
116     spin_unlock(&rq->lock);
117     }
118     @@ -3304,6 +3326,7 @@ switch_tasks:
119    
120     sched_info_switch(prev, next);
121     if (likely(prev != next)) {
122     + rq->preempted = rq->cache_ticks = 0;
123     next->timestamp = now;
124     rq->nr_switches++;
125     rq->curr = next;
126     @@ -6625,7 +6648,7 @@ void __init sched_init(void)
127     rq = cpu_rq(i);
128     spin_lock_init(&rq->lock);
129     lockdep_set_class(&rq->lock, &rq->rq_lock_key);
130     - rq->nr_running = 0;
131     + rq->nr_running = rq->cache_ticks = rq->preempted = 0;
132    
133     #ifdef CONFIG_SMP
134     rq->sd = NULL;
135     Index: linux-2.6.20-ck1/kernel/sysctl.c
136     ===================================================================
137     --- linux-2.6.20-ck1.orig/kernel/sysctl.c 2007-02-16 19:01:30.000000000 +1100
138     +++ linux-2.6.20-ck1/kernel/sysctl.c 2007-02-16 19:01:30.000000000 +1100
139     @@ -684,6 +684,14 @@ static ctl_table kern_table[] = {
140     .mode = 0644,
141     .proc_handler = &proc_dointvec,
142     },
143     + {
144     + .ctl_name = CTL_UNNUMBERED,
145     + .procname = "compute",
146     + .data = &sched_compute,
147     + .maxlen = sizeof (int),
148     + .mode = 0644,
149     + .proc_handler = &proc_dointvec,
150     + },
151     #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
152     {
153     .ctl_name = KERN_UNKNOWN_NMI_PANIC,
154     Index: linux-2.6.20-ck1/Documentation/sysctl/kernel.txt
155     ===================================================================
156     --- linux-2.6.20-ck1.orig/Documentation/sysctl/kernel.txt 2007-02-16 19:01:30.000000000 +1100
157     +++ linux-2.6.20-ck1/Documentation/sysctl/kernel.txt 2007-02-16 19:01:30.000000000 +1100
158     @@ -18,6 +18,7 @@ Currently, these files might (depending
159     show up in /proc/sys/kernel:
160     - acpi_video_flags
161     - acct
162     +- compute
163     - core_pattern
164     - core_uses_pid
165     - ctrl-alt-del
166     @@ -85,6 +86,16 @@ valid for 30 seconds.
167    
168     ==============================================================
169    
170     +compute:
171     +
172     +This flag controls the long timeslice, delayed preemption mode in the
173     +cpu scheduler suitable for scientific computation applications. It
174     +leads to large latencies so is unsuitable for normal usage.
175     +
176     +Disabled by default.
177     +
178     +==============================================================
179     +
180     core_pattern:
181    
182     core_pattern is used to specify a core dumpfile pattern name.