Add the SCHED_IDLEPRIO scheduling policy. Tasks set to this policy are only given cpu time if no other tasks at all wish to have cpu time thus running effectively at idle priority. If semaphores or mutexes are held, or the system is going into suspend, schedule them as SCHED_NORMAL nice 19. Bugfixes by Sergio Cerlesi & Serge Belyshev. Signed-off-by: Con Kolivas include/linux/init_task.h | 6 ++-- include/linux/sched.h | 9 ++++-- kernel/sched.c | 68 +++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 70 insertions(+), 13 deletions(-) Index: linux-2.6.20-ck1/include/linux/init_task.h =================================================================== --- linux-2.6.20-ck1.orig/include/linux/init_task.h 2007-02-16 19:01:31.000000000 +1100 +++ linux-2.6.20-ck1/include/linux/init_task.h 2007-02-16 19:01:31.000000000 +1100 @@ -99,9 +99,9 @@ extern struct group_info init_groups; .usage = ATOMIC_INIT(2), \ .flags = 0, \ .lock_depth = -1, \ - .prio = MAX_PRIO-20, \ - .static_prio = MAX_PRIO-20, \ - .normal_prio = MAX_PRIO-20, \ + .prio = MAX_PRIO-21, \ + .static_prio = MAX_PRIO-21, \ + .normal_prio = MAX_PRIO-21, \ .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ .mm = NULL, \ Index: linux-2.6.20-ck1/include/linux/sched.h =================================================================== --- linux-2.6.20-ck1.orig/include/linux/sched.h 2007-02-16 19:01:31.000000000 +1100 +++ linux-2.6.20-ck1/include/linux/sched.h 2007-02-16 19:01:31.000000000 +1100 @@ -35,10 +35,11 @@ #define SCHED_RR 2 #define SCHED_BATCH 3 #define SCHED_ISO 4 +#define SCHED_IDLEPRIO 5 #ifdef __KERNEL__ -#define SCHED_MAX SCHED_ISO +#define SCHED_MAX SCHED_IDLEPRIO #define SCHED_RANGE(policy) ((policy) <= SCHED_MAX) struct sched_param { @@ -529,8 +530,9 @@ struct signal_struct { #define MAX_RT_PRIO MAX_USER_RT_PRIO #define ISO_PRIO (MAX_RT_PRIO - 1) -#define MAX_PRIO (MAX_RT_PRIO + 40) -#define MIN_USER_PRIO (MAX_PRIO - 1) +#define MAX_PRIO (MAX_RT_PRIO + 41) +#define MIN_USER_PRIO (MAX_PRIO - 2) +#define IDLEPRIO_PRIO (MAX_PRIO - 1) #define rt_prio(prio) unlikely((prio) < ISO_PRIO) #define rt_task(p) rt_prio((p)->prio) @@ -539,6 +541,7 @@ struct signal_struct { (policy) == SCHED_RR) #define has_rt_policy(p) unlikely(is_rt_policy((p)->policy)) #define iso_task(p) (unlikely((p)->policy == SCHED_ISO)) +#define idleprio_task(p) (unlikely((p)->policy == SCHED_IDLEPRIO)) /* * Some day this will be a full-fledged user tracking system.. Index: linux-2.6.20-ck1/kernel/sched.c =================================================================== --- linux-2.6.20-ck1.orig/kernel/sched.c 2007-02-16 19:01:31.000000000 +1100 +++ linux-2.6.20-ck1/kernel/sched.c 2007-02-16 19:01:31.000000000 +1100 @@ -696,6 +696,12 @@ static void set_load_weight(struct task_ else #endif p->load_weight = RTPRIO_TO_LOAD_WEIGHT(p->rt_priority); + } else if (idleprio_task(p)) { + /* + * We want idleprio_tasks to have a presence on weighting but + * as small as possible + */ + p->load_weight = 1; } else p->load_weight = TASK_LOAD_WEIGHT(p); } @@ -872,6 +878,17 @@ static inline void recalc_task_prio(stru continue_slice(p); } +static inline int idleprio_suitable(struct task_struct *p) +{ + return (!p->mutexes_held && !freezing(p) && + !(p->flags & (PF_NONSLEEP | PF_EXITING))); +} + +static inline int idleprio(const struct task_struct *p) +{ + return (p->prio == IDLEPRIO_PRIO); +} + /* * __normal_prio - dynamic priority dependent on bonus. * The priority normally decreases by one each RR_INTERVAL. @@ -895,6 +912,18 @@ static inline int __normal_prio(struct t return ISO_PRIO; } + if (idleprio_task(p)) { + if (unlikely(!idleprio_suitable(p))) { + /* + * If idleprio tasks are holding a semaphore, mutex, + * or being frozen, schedule at a normal priority. + */ + p->time_slice = p->slice % RR_INTERVAL ? : RR_INTERVAL; + return MIN_USER_PRIO; + } + return IDLEPRIO_PRIO; + } + full_slice = slice(p); if (full_slice > p->slice) used_slice = full_slice - p->slice; @@ -1544,6 +1573,8 @@ out_activate: out_running: p->state = TASK_RUNNING; out: + if (idleprio_task(p) && freezing(p) && idleprio(p)) + requeue_task(p, rq, effective_prio(p)); task_rq_unlock(rq, &flags); return success; @@ -2941,7 +2972,7 @@ void account_user_time(struct task_struc /* Add user time to cpustat. */ tmp = cputime_to_cputime64(cputime); - if (TASK_NICE(p) > 0) + if (TASK_NICE(p) > 0 || idleprio_task(p)) cpustat->nice = cputime64_add(cpustat->nice, tmp); else cpustat->user = cputime64_add(cpustat->user, tmp); @@ -3051,9 +3082,12 @@ static void task_running_tick(struct rq } else p->flags &= ~PF_ISOREF; } else { - /* SCHED_FIFO tasks never run out of timeslice. */ - if (unlikely(p->policy == SCHED_FIFO)) - goto out_unlock; + if (idleprio_task(p) && !idleprio(p) && idleprio_suitable(p)) + set_tsk_need_resched(p); + else + /* SCHED_FIFO tasks never run out of timeslice. */ + if (unlikely(p->policy == SCHED_FIFO)) + goto out_unlock; } debit = ns_diff(rq->most_recent_timestamp, p->timestamp); @@ -3219,11 +3253,23 @@ dependent_sleeper(int this_cpu, struct r if ((jiffies % DEF_TIMESLICE) > (sd->per_cpu_gain * DEF_TIMESLICE / 100)) ret = 1; + else if (idleprio(p)) + ret = 1; } else { if (smt_curr->static_prio < p->static_prio && !TASK_PREEMPTS_CURR(p, smt_rq) && smt_slice(smt_curr, sd) > slice(p)) ret = 1; + else if (idleprio(p) && !idleprio_task(smt_curr) && + smt_curr->slice * sd->per_cpu_gain > + slice(smt_curr)) { + /* + * With idleprio tasks they run just the last + * per_cpu_gain percent of the smt task's + * slice. + */ + ret = 1; + } } unlock: spin_unlock(&smt_rq->lock); @@ -3884,8 +3930,9 @@ void set_user_nice(struct task_struct *p * If the task increased its priority or is running and * lowered its priority, then reschedule its CPU: */ - if (delta < 0 || (delta > 0 && task_running(rq, p))) - resched_task(rq->curr); + if (delta < 0 || ((delta > 0 || idleprio_task(p)) && + task_running(rq, p))) + resched_task(rq->curr); } out_unlock: task_rq_unlock(rq, &flags); @@ -4086,6 +4133,11 @@ recheck: return -EPERM; } + if (!(p->mm) && policy == SCHED_IDLEPRIO) { + /* Don't allow kernel threads to be SCHED_IDLEPRIO. */ + return -EINVAL; + } + retval = security_task_setscheduler(p, policy, param); if (retval) return retval; @@ -4407,7 +4459,7 @@ asmlinkage long sys_sched_yield(void) schedstat_inc(rq, yld_cnt); current->slice = slice(current); current->time_slice = rr_interval(current); - if (likely(!rt_task(current))) + if (likely(!rt_task(current) && !idleprio(current))) newprio = MIN_USER_PRIO; requeue_task(current, rq, newprio); @@ -4564,6 +4616,7 @@ asmlinkage long sys_sched_get_priority_m case SCHED_NORMAL: case SCHED_BATCH: case SCHED_ISO: + case SCHED_IDLEPRIO: ret = 0; break; } @@ -4589,6 +4642,7 @@ asmlinkage long sys_sched_get_priority_m case SCHED_NORMAL: case SCHED_BATCH: case SCHED_ISO: + case SCHED_IDLEPRIO: ret = 0; } return ret;