sched: disable IPIs while polling wakeup queue before idle

The scheduler polls the wakeup queue when idle for a short time before HLTing in order to avoid the expensive HLT instruction if a wakeup arrives early. This patch extends this to also disable remote wakeups during the polling period, reducing the waking cpu's need to issue an IPI, whicj requires an exit. This helps synchronous multithreaded workloads, where threads block and wake each other. Together with the following patch, netperf throughtput increases from ~17Gbps to ~19Gbps, and the context switch benchmark improves from $ run tests/tst-ctxsw.so 345 colocated 5761 apart 633 nopin to $ run tests/tst-ctxsw.so 347 colocated 598 apart 467 nopin
cloudius-systems · Aug 5, 2013 · 032aa93 · 032aa93
1 parent 3d58de8
commit 032aa93
Show file tree

Hide file tree

Showing 2 changed files with 43 additions and 8 deletions.
diff --git a/core/sched.cc b/core/sched.cc
@@ -168,15 +168,44 @@ void cpu::timer_fired()
     // nothing to do, preemption will happen if needed
 }
 
+struct idle_poll_lock_type {
+    explicit idle_poll_lock_type(cpu& c) : _c(c) {}
+    void lock() { _c.idle_poll_start(); }
+    void unlock() { _c.idle_poll_end(); }
+    cpu& _c;
+};
+
+void cpu::idle_poll_start()
+{
+    idle_poll.store(true, std::memory_order_relaxed);
+}
+
+void cpu::idle_poll_end()
+{
+    idle_poll.store(false, std::memory_order_relaxed);
+    std::atomic_thread_fence(std::memory_order_seq_cst);
+}
+
+void cpu::send_wakeup_ipi()
+{
+    std::atomic_thread_fence(std::memory_order_seq_cst);
+    if (!idle_poll.load(std::memory_order_relaxed)) {
+        wakeup_ipi.send(this);
+    }
+}
+
 void cpu::do_idle()
 {
     do {
-        // spin for a bit before halting
-        for (unsigned ctr = 0; ctr < 100; ++ctr) {
-            // FIXME: can we pull threads from loaded cpus?
-            handle_incoming_wakeups();
-            if (!runqueue.empty()) {
-                return;
+        idle_poll_lock_type idle_poll_lock{*this};
+        WITH_LOCK(idle_poll_lock) {
+            // spin for a bit before halting
+            for (unsigned ctr = 0; ctr < 100; ++ctr) {
+                // FIXME: can we pull threads from loaded cpus?
+                handle_incoming_wakeups();
+                if (!runqueue.empty()) {
+                    return;
+                }
             }
         }
         std::unique_lock<irq_lock_type> guard(irq_lock);
@@ -289,7 +318,7 @@ void cpu::load_balance()
             min->incoming_wakeups_mask.set(id);
             // FIXME: avoid if the cpu is alive and if the priority does not
             // FIXME: warrant an interruption
-            wakeup_ipi.send(min);
+            min->send_wakeup_ipi();
         }
     }
 }
@@ -493,7 +522,7 @@ void thread::wake()
     // FIXME: avoid if the cpu is alive and if the priority does not
     // FIXME: warrant an interruption
     if (_cpu != current()->tcpu()) {
-        wakeup_ipi.send(_cpu);
+        _cpu->send_wakeup_ipi();
     } else {
         need_reschedule = true;
     }

diff --git a/include/sched.hh b/include/sched.hh
@@ -313,6 +313,8 @@ struct cpu : private timer_base::client {
     timer_list timers;
     timer_base preemption_timer;
     thread* idle_thread;
+    // if true, cpu is now polling incoming_wakeups_mask
+    std::atomic<bool> idle_poll = { false };
     // for each cpu, a list of threads that are migrating into this cpu:
     typedef lockless_queue<thread, &thread::_wakeup_link> incoming_wakeup_queue;
     cpu_set incoming_wakeups_mask;
@@ -324,8 +326,12 @@ struct cpu : private timer_base::client {
     void init_on_cpu();
     void schedule(bool yield = false);
     void handle_incoming_wakeups();
+    bool poll_wakeup_queue();
     void idle();
     void do_idle();
+    void idle_poll_start();
+    void idle_poll_end();
+    void send_wakeup_ipi();
     void load_balance();
     unsigned load();
     void reschedule_from_interrupt(bool preempt = false);