include/linux/init_task.h | 4 +-- include/linux/sched.h | 6 +++- kernel/sched.c | 57 ++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 54 insertions(+), 13 deletions(-) Index: linux-2.6.16-ck1/include/linux/init_task.h =================================================================== --- linux-2.6.16-ck1.orig/include/linux/init_task.h 2006-03-20 20:46:25.000000000 +1100 +++ linux-2.6.16-ck1/include/linux/init_task.h 2006-03-20 20:46:51.000000000 +1100 @@ -83,8 +83,8 @@ extern struct group_info init_groups; .usage = ATOMIC_INIT(2), \ .flags = 0, \ .lock_depth = -1, \ - .prio = MAX_PRIO-20, \ - .static_prio = MAX_PRIO-20, \ + .prio = MAX_PRIO-21, \ + .static_prio = MAX_PRIO-21, \ .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ .mm = NULL, \ Index: linux-2.6.16-ck1/include/linux/sched.h =================================================================== --- linux-2.6.16-ck1.orig/include/linux/sched.h 2006-03-20 20:46:50.000000000 +1100 +++ linux-2.6.16-ck1/include/linux/sched.h 2006-03-20 20:46:51.000000000 +1100 @@ -163,9 +163,10 @@ extern unsigned long weighted_cpuload(co #define SCHED_RR 2 #define SCHED_BATCH 3 #define SCHED_ISO 4 +#define SCHED_IDLEPRIO 5 #define SCHED_MIN 0 -#define SCHED_MAX 4 +#define SCHED_MAX 5 #define SCHED_RANGE(policy) ((policy) >= SCHED_MIN && \ (policy) <= SCHED_MAX) @@ -497,10 +498,11 @@ struct signal_struct { #define MAX_USER_RT_PRIO 100 #define MAX_RT_PRIO MAX_USER_RT_PRIO -#define MAX_PRIO (MAX_RT_PRIO + 40) +#define MAX_PRIO (MAX_RT_PRIO + 41) #define rt_task(p) (unlikely(SCHED_RT((p)->policy))) #define iso_task(p) ((p)->policy == SCHED_ISO) +#define idleprio_task(p) ((p)->policy == SCHED_IDLEPRIO) /* * Some day this will be a full-fledged user tracking system.. Index: linux-2.6.16-ck1/kernel/sched.c =================================================================== --- linux-2.6.16-ck1.orig/kernel/sched.c 2006-03-20 20:46:50.000000000 +1100 +++ linux-2.6.16-ck1/kernel/sched.c 2006-03-20 20:46:51.000000000 +1100 @@ -710,7 +710,7 @@ int sched_interactive __read_mostly = 1; * As the bonus increases the initial priority starts at a higher "stair" or * priority for longer. */ -static int effective_prio(const task_t *p) +static int effective_prio(task_t *p) { int prio; unsigned int full_slice, used_slice = 0; @@ -730,6 +730,20 @@ static int effective_prio(const task_t * return MAX_RT_PRIO - 1; } + if (idleprio_task(p)) { + if (unlikely(p->flags & (PF_NONSLEEP | PF_FREEZE))) { + /* + * If idleprio is waking up from in kernel activity + * or being frozen, reschedule at a normal priority + * to begin with. + */ + p->time_slice = p->slice % RR_INTERVAL() ? : + RR_INTERVAL(); + return MAX_PRIO - 2; + } + return MAX_PRIO - 1; + } + full_slice = slice(p); if (full_slice > p->slice) used_slice = full_slice - p->slice; @@ -741,8 +755,8 @@ static int effective_prio(const task_t * rr = rr_interval(p); prio += used_slice / rr; - if (prio > MAX_PRIO - 1) - prio = MAX_PRIO - 1; + if (prio > MAX_PRIO - 2) + prio = MAX_PRIO - 2; return prio; } @@ -2470,7 +2484,7 @@ void account_user_time(struct task_struc /* Add user time to cpustat. */ tmp = cputime_to_cputime64(cputime); - if (TASK_NICE(p) > 0) + if (TASK_NICE(p) > 0 || idleprio_task(p)) cpustat->nice = cputime64_add(cpustat->nice, tmp); else cpustat->user = cputime64_add(cpustat->user, tmp); @@ -2766,11 +2780,22 @@ static int dependent_sleeper(const int t if ((jiffies % DEF_TIMESLICE) > (sd->per_cpu_gain * DEF_TIMESLICE / 100)) ret = 1; - } else + else if (idleprio_task(p)) + ret = 1; + } else { if (smt_curr->static_prio < p->static_prio && !TASK_PREEMPTS_CURR(p, smt_rq) && smt_slice(smt_curr, sd) > slice(p)) ret = 1; + else if (idleprio_task(p) && !idleprio_task(smt_curr) && + smt_curr->slice * sd->per_cpu_gain > + slice(smt_curr)) + /* + * With batch tasks they run just the last + * per_cpu_gain percent of the smt task's slice. + */ + ret = 1; + } check_smt_task: if ((!smt_curr->mm && smt_curr != smt_rq->idle) || @@ -2790,10 +2815,15 @@ check_smt_task: if ((jiffies % DEF_TIMESLICE) > (sd->per_cpu_gain * DEF_TIMESLICE / 100)) resched_task(smt_curr); + else if (idleprio_task(smt_curr)) + resched_task(smt_curr); } else { if (TASK_PREEMPTS_CURR(p, smt_rq) && smt_slice(p, sd) > slice(smt_curr)) resched_task(smt_curr); + else if (idleprio_task(smt_curr) && !idleprio_task(p) && + p->slice * sd->per_cpu_gain > slice(p)) + resched_task(smt_curr); else wakeup_busy_runqueue(smt_rq); } @@ -3436,8 +3466,9 @@ void set_user_nice(task_t *p, const long * If the task increased its priority or is running and * lowered its priority, then reschedule its CPU: */ - if (delta < 0 || (delta > 0 && task_running(rq, p))) - resched_task(rq->curr); + if (delta < 0 || ((delta > 0 || idleprio_task(p)) && + task_running(rq, p))) + resched_task(rq->curr); } out_unlock: task_rq_unlock(rq, &flags); @@ -3630,6 +3661,12 @@ recheck: return -EPERM; } + if (!(p->mm) && policy == SCHED_IDLEPRIO) + /* + * Don't allow kernel threads to be SCHED_IDLEPRIO. + */ + return -EINVAL; + retval = security_task_setscheduler(p, policy, param); if (retval) return retval; @@ -3931,8 +3968,8 @@ asmlinkage long sys_sched_yield(void) schedstat_inc(rq, yld_cnt); current->slice = slice(current); current->time_slice = rr_interval(current); - if (likely(!rt_task(current))) - newprio = MAX_PRIO - 1; + if (likely(!rt_task(current) && !idleprio_task(current))) + newprio = MAX_PRIO - 2; if (newprio != current->prio) { dequeue_task(current, rq); @@ -4091,6 +4128,7 @@ asmlinkage long sys_sched_get_priority_m case SCHED_NORMAL: case SCHED_BATCH: case SCHED_ISO: + case SCHED_IDLEPRIO: ret = 0; break; } @@ -4116,6 +4154,7 @@ asmlinkage long sys_sched_get_priority_m case SCHED_NORMAL: case SCHED_BATCH: case SCHED_ISO: + case SCHED_IDLEPRIO: ret = 0; } return ret;