Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Demotion reloaded #2

Merged
merged 13 commits into from
Jul 28, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
VERSION = 4
PATCHLEVEL = 4
SUBLEVEL = 43
EXTRAVERSION = -HCBS
EXTRAVERSION = -HCBS-Demotion
NAME = Blurry Fish Butt

# *DOCUMENTATION*
Expand Down
5 changes: 5 additions & 0 deletions include/linux/sched/rt.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ static inline int rt_task(struct task_struct *p)
return rt_prio(p->prio);
}

static inline int rt_throttled(struct task_struct *p)
{
return !list_empty(&p->rt.cfs_throttled_task);
}

#ifdef CONFIG_RT_MUTEXES
extern int rt_mutex_getprio(struct task_struct *p);
extern void rt_mutex_setprio(struct task_struct *p, int prio);
Expand Down
90 changes: 89 additions & 1 deletion kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1094,7 +1094,7 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
const struct sched_class *prev_class,
int oldprio)
{
if (prev_class != p->sched_class) {
if (prev_class != p->sched_class || rt_throttled(p)) {
if (prev_class->switched_from)
prev_class->switched_from(rq, p);

Expand Down Expand Up @@ -3632,6 +3632,94 @@ int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
}
EXPORT_SYMBOL(default_wake_function);

void __setprio_other(struct rq *rq, struct task_struct *p)
{
int oldprio, queued, running;
const struct sched_class *prev_class;

lockdep_assert_held(&rq->lock);

oldprio = p->prio;
prev_class = p->sched_class;
queued = task_on_rq_queued(p);
running = task_current(rq, p);
BUG_ON(!rt_throttled(p));

if (queued)
dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_MOVE);
/*
if (running)
put_prev_task(rq, p);
*/
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this commented out?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I replied to the email, but I do not see replies here... So, here it is again:
this function is invoked by cfs_throttle_rt_tasks(), that is invoked by update_curr_rt().
Invoking put_prev_task() would result in another invocation of update_curr_rt(), potentially causing some issues.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. Also, considering that put_prev_task_rt() would only enqueue the task in the pushable list (and we don't want that). It seems save to remove it.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that I remember: I removed it due to some crash I was seeing, that I decided to be caused by infinite recursion (update_curr_rt -> cfs_throttle_rt_tasks -> __setprio_fifo -> put_prev_task_rt -> update_curr_rt -> cfs_throttle_rt_tasks -> ...)

p->sched_class = &fair_sched_class;
p->prio = DEFAULT_PRIO;

/*
* As in attach_task_cfs_rq, since the real-depth could have been
* changed (only FAIR class maintain depth value), reset depth
* properly.
*/
p->se.depth = p->se.parent ? p->se.parent->depth + 1 : 0;

if (running)
p->sched_class->set_curr_task(rq);
if (queued)
enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_MOVE);

check_class_changed(rq, p, prev_class, oldprio);
}

void __setprio_fifo(struct rq *rq, struct task_struct *p)
{
int oldprio, queued, running, cpu;
const struct sched_class *prev_class;
unsigned int count = 0;

lockdep_assert_held(&rq->lock);

/*
* p might have migrated while hanging out in OTHER. We will need its
* current rq lock for dequeue_task/put_prev_task.
*/
again:
cpu = task_cpu(p);
if (cpu != cpu_of(rq)) {
double_lock_balance(rq, cpu_rq(cpu));
if (cpu != task_cpu(p)) {
double_unlock_balance(rq, cpu_rq(cpu));
count++;
BUG_ON(count > 10);
goto again;
}
}

BUG_ON(p->sched_class == &rt_sched_class);

oldprio = p->prio;
prev_class = p->sched_class;
queued = task_on_rq_queued(p);
running = task_current(cpu_rq(cpu), p);
BUG_ON(rt_throttled(p));

if (queued)
dequeue_task(cpu_rq(cpu), p, DEQUEUE_SAVE | DEQUEUE_MOVE);
if (running)
put_prev_task(cpu_rq(cpu), p);

p->sched_class = &rt_sched_class;
p->prio = (MAX_RT_PRIO - 1) - p->rt_priority;

if (running)
p->sched_class->set_curr_task(cpu_rq(cpu));
if (queued)
enqueue_task(cpu_rq(cpu), p, ENQUEUE_REPLENISH | ENQUEUE_MOVE | ENQUEUE_RESTORE);

check_class_changed(cpu_rq(cpu), p, prev_class, oldprio);
out:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This label doesn't seem to be used.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Uhmm... Right. I suspect is a leftover from some previous change; I am going to check

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I checked:
your original patch contained an

  •   if (p->sched_class == &rt_sched_class)
    
  •           goto out;
    

near the beginning of __setprio_fifo().
Since I think that entering __setprio_fifo() with sched_class == rt_sched_class, I changed this to

  •   BUG_ON(p->sched_class == &rt_sched_class);
    

but I forgot to remove the "out:" label.

if (cpu != cpu_of(rq))
double_unlock_balance(rq, cpu_rq(cpu));
}

#ifdef CONFIG_RT_MUTEXES

/*
Expand Down
3 changes: 3 additions & 0 deletions kernel/sched/deadline.c
Original file line number Diff line number Diff line change
Expand Up @@ -621,15 +621,18 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
sched_clock_tick();
update_rq_clock(rq);

cfs_unthrottle_rt_tasks(rt_rq);
dl_se->dl_throttled = 0;
if (rt_rq->rt_nr_running) {
enqueue_dl_entity(dl_se, dl_se, ENQUEUE_REPLENISH);

resched_curr(rq);
/*
#ifdef CONFIG_SMP
if (has_pushable_dl_tasks(rq))
push_dl_task(rq);
#endif
*/
} else {
replenish_dl_entity(dl_se, dl_se);
}
Expand Down
32 changes: 32 additions & 0 deletions kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -6088,9 +6088,33 @@ static void migrate_task_rq_fair(struct task_struct *p)
p->se.exec_start = 0;
}

#ifdef CONFIG_RT_GROUP_SCHED
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
return rt_rq->rq;
}
#else
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
return container_of(rt_rq, struct rq, rt);
}
#endif /* CONFIG_RT_GROUP_SCHED */

static void task_dead_fair(struct task_struct *p)
{
remove_entity_load_avg(&p->se);

/*
* p got killed while hanging out in RT.
* Remove it from throttled_task list.
*/
if (rt_throttled(p)) {
struct sched_rt_entity *rt_se = &p->rt;
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);

lockdep_assert_held(&rq_of_rt_rq(rt_rq)->lock);
list_del_init(&rt_se->cfs_throttled_task);
}
}
#else
#define task_fits_max(p, cpu) true
Expand Down Expand Up @@ -9296,6 +9320,14 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)

static void switched_to_fair(struct rq *rq, struct task_struct *p)
{
if (!rt_prio(p->normal_prio) && rt_throttled(p)) {
struct sched_rt_entity *rt_se = &p->rt;
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);

lockdep_assert_held(&rq_of_rt_rq(rt_rq)->lock);
list_del_init(&rt_se->cfs_throttled_task);
}

attach_task_cfs_rq(p);

if (task_on_rq_queued(p)) {
Expand Down
99 changes: 96 additions & 3 deletions kernel/sched/rt.c
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,72 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se)
return rt_task_of(rt_se)->prio;
}

/*
* Iterates through all the tasks on @rt_rq and, depending on @enqueue, moves
* them between FIFO and OTHER.
*/
static void cfs_throttle_rt_tasks(struct rt_rq *rt_rq)
{
struct rt_prio_array *array = &rt_rq->active;
struct rq *rq = rq_of_rt_rq(rt_rq);
int idx;
struct sched_rt_entity *sleep_se = NULL;

if (bitmap_empty(array->bitmap, MAX_RT_PRIO))
return;

idx = sched_find_first_bit(array->bitmap);
while (idx < MAX_RT_PRIO) {
while (!list_empty(array->queue + idx)) {
struct sched_rt_entity *rt_se;
struct task_struct *p;

rt_se = list_first_entry(array->queue + idx,
struct sched_rt_entity,
run_list);

if (sleep_se == rt_se)
break;

p = rt_task_of(rt_se);
/*
* Don't enqueue in fair if the task is going
* to sleep. We'll handle the transition at
* wakeup time eventually.
*/
if (p->state != TASK_RUNNING) {
/* Only one curr */
BUG_ON(sleep_se);
sleep_se = rt_se;
continue;
}

list_add(&rt_se->cfs_throttled_task,
&rt_rq->cfs_throttled_tasks);
__setprio_other(rq, p);
}
idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx + 1);
}
}

void cfs_unthrottle_rt_tasks(struct rt_rq *rt_rq)
{
struct rq *rq = rq_of_rt_rq(rt_rq);

while (!list_empty(&rt_rq->cfs_throttled_tasks)) {
struct sched_rt_entity *rt_se;
struct task_struct *p;

rt_se = list_first_entry(&rt_rq->cfs_throttled_tasks,
struct sched_rt_entity,
cfs_throttled_task);

p = rt_task_of(rt_se);
list_del_init(&rt_se->cfs_throttled_task);
__setprio_fifo(rq, p);
}
}

/*
* Update the current task's runtime statistics. Skip current tasks that
* are not in our scheduling class.
Expand Down Expand Up @@ -388,9 +454,10 @@ static void update_curr_rt(struct rq *rq)
if (dl_runtime_exceeded(dl_se)) {
dequeue_dl_entity(dl_se);

if (likely(start_dl_timer(dl_se)))
if (likely(start_dl_timer(dl_se))) {
dl_se->dl_throttled = 1;
else
cfs_throttle_rt_tasks(rt_rq);
} else
enqueue_dl_entity(dl_se, dl_se,
ENQUEUE_REPLENISH);

Expand Down Expand Up @@ -608,9 +675,31 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
if (!dl_se->dl_throttled) {
enqueue_dl_entity(dl_se, dl_se, flags);
resched_curr(rq);
} else {
BUG_ON(rt_throttled(p));
/*
* rt_se's group was throttled while this task was
* sleeping/blocked/migrated.
*
* Do the transition towards OTHER now.
*/
if ((flags & ENQUEUE_REPLENISH) == 0) {
BUG_ON(on_rt_rq(rt_se));
lockdep_assert_held(&rq->lock);

list_add(&rt_se->cfs_throttled_task,
&rt_rq->cfs_throttled_tasks);
p->sched_class = &fair_sched_class;
p->prio = DEFAULT_PRIO;
p->sched_class->enqueue_task(rq, p, flags);
p->sched_class->switched_to(rq, p);

return;
}
}
}

BUG_ON(p->sched_class != &rt_sched_class);
enqueue_rt_entity(rt_se, flags);
walt_inc_cumulative_runnable_avg(rq, p);

Expand All @@ -623,7 +712,9 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
struct sched_rt_entity *rt_se = &p->rt;
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);

update_curr_rt(rq);
if (!rt_throttled(p))
update_curr_rt(rq);
BUG_ON(p->sched_class != &rt_sched_class);
dequeue_rt_entity(rt_se, flags);
walt_dec_cumulative_runnable_avg(rq, p);

Expand Down Expand Up @@ -1554,6 +1645,8 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
{
struct rt_rq *rt_rq = rt_rq_of_se(&p->rt);

BUG_ON(task_cpu(p) != cpu_of(rq));

/*
* If there are other RT tasks then we will reschedule
* and the scheduling of the other RT tasks will handle
Expand Down
4 changes: 4 additions & 0 deletions kernel/sched/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -785,6 +785,10 @@ static inline void rq_clock_skip_update(struct rq *rq, bool skip)
rq->clock_skip_update &= ~RQCF_REQ_SKIP;
}

void __setprio_other(struct rq *rq, struct task_struct *p);
void __setprio_fifo(struct rq *rq, struct task_struct *p);
void cfs_unthrottle_rt_tasks(struct rt_rq *rt_rq);

#ifdef CONFIG_NUMA
enum numa_topology_type {
NUMA_DIRECT,
Expand Down