CoreTiming: Reworked CoreTiming (cherry-picked from Citra #3119)

* CoreTiming: New CoreTiming; Add Test for CoreTiming
2017-11-25 14:56:57 +07:00 · 2017-11-25 14:56:57 +07:00 · 82151d407d
parent c12c756539
commit 82151d407d
12 changed files with 665 additions and 584 deletions
--- a/src/audio_core/audio_core.cpp
+++ b/src/audio_core/audio_core.cpp
@ -18,7 +18,7 @@
 namespace AudioCore {

 // Audio Ticks occur about every 5 miliseconds.
-static int tick_event;                               ///< CoreTiming event
+static CoreTiming::EventType* tick_event;            ///< CoreTiming event
 static constexpr u64 audio_frame_ticks = 1310252ull; ///< Units: ARM11 cycles

 static void AudioTickCallback(u64 /*userdata*/, int cycles_late) {
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@ -76,6 +76,7 @@ set(HEADERS
            telemetry.h
            thread.h
            thread_queue_list.h
+            threadsafe_queue.h
            timer.h
            vector_math.h
            )
--- a/src/common/threadsafe_queue.h
+++ b/src/common/threadsafe_queue.h
@ -0,0 +1,122 @@
+// Copyright 2010 Dolphin Emulator Project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#pragma once
+
+// a simple lockless thread-safe,
+// single reader, single writer queue
+
+#include <algorithm>
+#include <atomic>
+#include <cstddef>
+#include <mutex>
+#include "common/common_types.h"
+
+namespace Common {
+template <typename T, bool NeedSize = true>
+class SPSCQueue {
+public:
+    SPSCQueue() : size(0) {
+        write_ptr = read_ptr = new ElementPtr();
+    }
+    ~SPSCQueue() {
+        // this will empty out the whole queue
+        delete read_ptr;
+    }
+
+    u32 Size() const {
+        static_assert(NeedSize, "using Size() on FifoQueue without NeedSize");
+        return size.load();
+    }
+
+    bool Empty() const {
+        return !read_ptr->next.load();
+    }
+    T& Front() const {
+        return read_ptr->current;
+    }
+    template <typename Arg>
+    void Push(Arg&& t) {
+        // create the element, add it to the queue
+        write_ptr->current = std::forward<Arg>(t);
+        // set the next pointer to a new element ptr
+        // then advance the write pointer
+        ElementPtr* new_ptr = new ElementPtr();
+        write_ptr->next.store(new_ptr, std::memory_order_release);
+        write_ptr = new_ptr;
+        if (NeedSize)
+            size++;
+    }
+
+    void Pop() {
+        if (NeedSize)
+            size--;
+        ElementPtr* tmpptr = read_ptr;
+        // advance the read pointer
+        read_ptr = tmpptr->next.load();
+        // set the next element to nullptr to stop the recursive deletion
+        tmpptr->next.store(nullptr);
+        delete tmpptr; // this also deletes the element
+    }
+
+    bool Pop(T& t) {
+        if (Empty())
+            return false;
+
+        if (NeedSize)
+            size--;
+
+        ElementPtr* tmpptr = read_ptr;
+        read_ptr = tmpptr->next.load(std::memory_order_acquire);
+        t = std::move(tmpptr->current);
+        tmpptr->next.store(nullptr);
+        delete tmpptr;
+        return true;
+    }
+
+    // not thread-safe
+    void Clear() {
+        size.store(0);
+        delete read_ptr;
+        write_ptr = read_ptr = new ElementPtr();
+    }
+
+private:
+    // stores a pointer to element
+    // and a pointer to the next ElementPtr
+    class ElementPtr {
+    public:
+        ElementPtr() : next(nullptr) {}
+        ~ElementPtr() {
+            ElementPtr* next_ptr = next.load();
+
+            if (next_ptr)
+                delete next_ptr;
+        }
+
+        T current;
+        std::atomic<ElementPtr*> next;
+    };
+
+    ElementPtr* write_ptr;
+    ElementPtr* read_ptr;
+    std::atomic<u32> size;
+};
+
+// a simple thread-safe,
+// single reader, multiple writer queue
+
+template <typename T, bool NeedSize = true>
+class MPSCQueue : public SPSCQueue<T, NeedSize> {
+public:
+    template <typename Arg>
+    void Push(Arg&& t) {
+        std::lock_guard<std::mutex> lock(write_lock);
+        SPSCQueue<T, NeedSize>::Push(t);
+    }
+
+private:
+    std::mutex write_lock;
+};
+} // namespace Common
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@ -54,6 +54,7 @@ System::ResultStatus System::RunLoop(int tight_loop) {
        CoreTiming::Advance();
        PrepareReschedule();
    } else {
+        CoreTiming::Advance();
        cpu_core->Run(tight_loop);
    }

--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@ -1,562 +1,238 @@
-// Copyright (c) 2012- PPSSPP Project / Dolphin Project.
-// Licensed under GPLv2 or any later version
+// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project
+// Licensed under GPLv2+
 // Refer to the license.txt file included.

-#include <atomic>
-#include <cinttypes>
-#include <mutex>
-#include <vector>
-#include "common/chunk_file.h"
-#include "common/logging/log.h"
-#include "common/string_util.h"
-#include "core/arm/arm_interface.h"
-#include "core/core.h"
 #include "core/core_timing.h"

-int g_clock_rate_arm11 = BASE_CLOCK_RATE;
-
-// is this really necessary?
-#define INITIAL_SLICE_LENGTH 20000
-#define MAX_SLICE_LENGTH 100000000
+#include <algorithm>
+#include <cinttypes>
+#include <mutex>
+#include <string>
+#include <tuple>
+#include <unordered_map>
+#include <vector>
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/thread.h"
+#include "common/threadsafe_queue.h"

 namespace CoreTiming {
-struct EventType {
-    EventType() {}
-
-    EventType(TimedCallback cb, const char* n) : callback(cb), name(n) {}
-
-    TimedCallback callback;
-    const char* name;
-};
-
-static std::vector<EventType> event_types;
-
-struct BaseEvent {
-    s64 time;
-    u64 userdata;
-    int type;
-};
-
-typedef LinkedListItem<BaseEvent> Event;
-
-static Event* first;
-static Event* ts_first;
-static Event* ts_last;
-
-// event pools
-static Event* event_pool = nullptr;
-static Event* event_ts_pool = nullptr;
-static int allocated_ts_events = 0;
-// Optimization to skip MoveEvents when possible.
-static std::atomic<bool> has_ts_events(false);
-
-int g_slice_length;

 static s64 global_timer;
+static int slice_length;
+static int downcount;
+
+struct EventType {
+    TimedCallback callback;
+    const std::string* name;
+};
+
+struct Event {
+    s64 time;
+    u64 fifo_order;
+    u64 userdata;
+    const EventType* type;
+};
+
+// Sort by time, unless the times are the same, in which case sort by the order added to the queue
+static bool operator>(const Event& left, const Event& right) {
+    return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
+}
+
+static bool operator<(const Event& left, const Event& right) {
+    return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
+}
+
+// unordered_map stores each element separately as a linked list node so pointers to elements
+// remain stable regardless of rehashes/resizing.
+static std::unordered_map<std::string, EventType> event_types;
+
+// The queue is a min-heap using std::make_heap/push_heap/pop_heap.
+// We don't use std::priority_queue because we need to be able to serialize, unserialize and
+// erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't accomodated
+// by the standard adaptor class.
+static std::vector<Event> event_queue;
+static u64 event_fifo_id;
+// the queue for storing the events from other threads threadsafe until they will be added
+// to the event_queue by the emu thread
+static Common::MPSCQueue<Event, false> ts_queue;
+
+static constexpr int MAX_SLICE_LENGTH = 20000;
+
 static s64 idled_cycles;
-static s64 last_global_time_ticks;
-static s64 last_global_time_us;

-static s64 down_count = 0; ///< A decreasing counter of remaining cycles before the next event,
-                           /// decreased by the cpu run loop
+// Are we in a function that has been called from Advance()
+// If events are sheduled from a function that gets called from Advance(),
+// don't change slice_length and downcount.
+static bool is_global_timer_sane;

-static std::recursive_mutex external_event_section;
+static EventType* ev_lost = nullptr;

-// Warning: not included in save state.
-using AdvanceCallback = void(int cycles_executed);
-static AdvanceCallback* advance_callback = nullptr;
-static std::vector<MHzChangeCallback> mhz_change_callbacks;
+static void EmptyTimedCallback(u64 userdata, s64 cyclesLate) {}

-static void FireMhzChange() {
-    for (auto callback : mhz_change_callbacks)
-        callback();
-}
+EventType* RegisterEvent(const std::string& name, TimedCallback callback) {
+    // check for existing type with same name.
+    // we want event type names to remain unique so that we can use them for serialization.
+    ASSERT_MSG(event_types.find(name) == event_types.end(),
+               "CoreTiming Event \"%s\" is already registered. Events should only be registered "
+               "during Init to avoid breaking save states.",
+               name.c_str());

-void SetClockFrequencyMHz(int cpu_mhz) {
-    // When the mhz changes, we keep track of what "time" it was before hand.
-    // This way, time always moves forward, even if mhz is changed.
-    last_global_time_us = GetGlobalTimeUs();
-    last_global_time_ticks = GetTicks();
-
-    g_clock_rate_arm11 = cpu_mhz * 1000000;
-    // TODO: Rescale times of scheduled events?
-
-    FireMhzChange();
-}
-
-int GetClockFrequencyMHz() {
-    return g_clock_rate_arm11 / 1000000;
-}
-
-u64 GetGlobalTimeUs() {
-    s64 ticks_since_last = GetTicks() - last_global_time_ticks;
-    int freq = GetClockFrequencyMHz();
-    s64 us_since_last = ticks_since_last / freq;
-    return last_global_time_us + us_since_last;
-}
-
-static Event* GetNewEvent() {
-    if (!event_pool)
-        return new Event;
-
-    Event* event = event_pool;
-    event_pool = event->next;
-    return event;
-}
-
-static Event* GetNewTsEvent() {
-    allocated_ts_events++;
-
-    if (!event_ts_pool)
-        return new Event;
-
-    Event* event = event_ts_pool;
-    event_ts_pool = event->next;
-    return event;
-}
-
-static void FreeEvent(Event* event) {
-    event->next = event_pool;
-    event_pool = event;
-}
-
-static void FreeTsEvent(Event* event) {
-    event->next = event_ts_pool;
-    event_ts_pool = event;
-    allocated_ts_events--;
-}
-
-int RegisterEvent(const char* name, TimedCallback callback) {
-    event_types.emplace_back(callback, name);
-    return (int)event_types.size() - 1;
-}
-
-static void AntiCrashCallback(u64 userdata, int cycles_late) {
-    LOG_CRITICAL(Core_Timing, "Savestate broken: an unregistered event was called.");
-}
-
-void RestoreRegisterEvent(int event_type, const char* name, TimedCallback callback) {
-    if (event_type >= (int)event_types.size())
-        event_types.resize(event_type + 1, EventType(AntiCrashCallback, "INVALID EVENT"));
-
-    event_types[event_type] = EventType(callback, name);
+    auto info = event_types.emplace(name, EventType{callback, nullptr});
+    EventType* event_type = &info.first->second;
+    event_type->name = &info.first->first;
+    return event_type;
 }

 void UnregisterAllEvents() {
-    if (first)
-        LOG_ERROR(Core_Timing, "Cannot unregister events with events pending");
+    ASSERT_MSG(event_queue.empty(), "Cannot unregister events with events pending");
    event_types.clear();
 }

 void Init() {
-    down_count = INITIAL_SLICE_LENGTH;
-    g_slice_length = INITIAL_SLICE_LENGTH;
+    downcount = MAX_SLICE_LENGTH;
+    slice_length = MAX_SLICE_LENGTH;
    global_timer = 0;
    idled_cycles = 0;
-    last_global_time_ticks = 0;
-    last_global_time_us = 0;
-    has_ts_events = 0;
-    mhz_change_callbacks.clear();

-    first = nullptr;
-    ts_first = nullptr;
-    ts_last = nullptr;
+    // The time between CoreTiming being intialized and the first call to Advance() is considered
+    // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before
+    // executing the first cycle of each slice to prepare the slice length and downcount for
+    // that slice.
+    is_global_timer_sane = true;

-    event_pool = nullptr;
-    event_ts_pool = nullptr;
-    allocated_ts_events = 0;
-
-    advance_callback = nullptr;
+    event_fifo_id = 0;
+    ev_lost = RegisterEvent("_lost_event", &EmptyTimedCallback);
 }

 void Shutdown() {
    MoveEvents();
    ClearPendingEvents();
    UnregisterAllEvents();
+}

-    while (event_pool) {
-        Event* event = event_pool;
-        event_pool = event->next;
-        delete event;
-    }
-
-    std::lock_guard<std::recursive_mutex> lock(external_event_section);
-    while (event_ts_pool) {
-        Event* event = event_ts_pool;
-        event_ts_pool = event->next;
-        delete event;
+// This should only be called from the CPU thread. If you are calling
+// it from any other thread, you are doing something evil
+u64 GetTicks() {
+    u64 ticks = static_cast<u64>(global_timer);
+    if (!is_global_timer_sane) {
+        ticks += slice_length - downcount;
    }
+    return ticks;
 }

 void AddTicks(u64 ticks) {
-    down_count -= ticks;
-    if (down_count < 0) {
-        Advance();
-    }
-}
-
-u64 GetTicks() {
-    return (u64)global_timer + g_slice_length - down_count;
+    downcount -= ticks;
 }

 u64 GetIdleTicks() {
-    return (u64)idled_cycles;
-}
-
-// This is to be called when outside threads, such as the graphics thread, wants to
-// schedule things to be executed on the main thread.
-void ScheduleEvent_Threadsafe(s64 cycles_into_future, int event_type, u64 userdata) {
-    std::lock_guard<std::recursive_mutex> lock(external_event_section);
-    Event* new_event = GetNewTsEvent();
-    new_event->time = GetTicks() + cycles_into_future;
-    new_event->type = event_type;
-    new_event->next = nullptr;
-    new_event->userdata = userdata;
-    if (!ts_first)
-        ts_first = new_event;
-    if (ts_last)
-        ts_last->next = new_event;
-    ts_last = new_event;
-
-    has_ts_events = true;
-}
-
-// Same as ScheduleEvent_Threadsafe(0, ...) EXCEPT if we are already on the CPU thread
-// in which case the event will get handled immediately, before returning.
-void ScheduleEvent_Threadsafe_Immediate(int event_type, u64 userdata) {
-    if (false) // Core::IsCPUThread())
-    {
-        std::lock_guard<std::recursive_mutex> lock(external_event_section);
-        event_types[event_type].callback(userdata, 0);
-    } else
-        ScheduleEvent_Threadsafe(0, event_type, userdata);
+    return static_cast<u64>(idled_cycles);
 }

 void ClearPendingEvents() {
-    while (first) {
-        Event* event = first->next;
-        FreeEvent(first);
-        first = event;
+    event_queue.clear();
+}
+
+void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
+    ASSERT(event_type != nullptr);
+    s64 timeout = GetTicks() + cycles_into_future;
+
+    // If this event needs to be scheduled before the next advance(), force one early
+    if (!is_global_timer_sane)
+        ForceExceptionCheck(cycles_into_future);
+
+    event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
+    std::push_heap(event_queue.begin(), event_queue.end(), std::greater<Event>());
+}
+
+void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
+    ts_queue.Push(Event{global_timer + cycles_into_future, 0, userdata, event_type});
+}
+
+void UnscheduleEvent(const EventType* event_type, u64 userdata) {
+    auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
+        return e.type == event_type && e.userdata == userdata;
+    });
+
+    // Removing random items breaks the invariant so we have to re-establish it.
+    if (itr != event_queue.end()) {
+        event_queue.erase(itr, event_queue.end());
+        std::make_heap(event_queue.begin(), event_queue.end(), std::greater<Event>());
    }
 }

-static void AddEventToQueue(Event* new_event) {
-    Event* prev_event = nullptr;
-    Event** next_event = &first;
-    for (;;) {
-        Event*& next = *next_event;
-        if (!next || new_event->time < next->time) {
-            new_event->next = next;
-            next = new_event;
-            break;
-        }
-        prev_event = next;
-        next_event = &prev_event->next;
+void RemoveEvent(const EventType* event_type) {
+    auto itr = std::remove_if(event_queue.begin(), event_queue.end(),
+                              [&](const Event& e) { return e.type == event_type; });
+
+    // Removing random items breaks the invariant so we have to re-establish it.
+    if (itr != event_queue.end()) {
+        event_queue.erase(itr, event_queue.end());
+        std::make_heap(event_queue.begin(), event_queue.end(), std::greater<Event>());
    }
 }

-void ScheduleEvent(s64 cycles_into_future, int event_type, u64 userdata) {
-    Event* new_event = GetNewEvent();
-    new_event->userdata = userdata;
-    new_event->type = event_type;
-    new_event->time = GetTicks() + cycles_into_future;
-    AddEventToQueue(new_event);
-}
-
-s64 UnscheduleEvent(int event_type, u64 userdata) {
-    s64 result = 0;
-    if (!first)
-        return result;
-    while (first) {
-        if (first->type == event_type && first->userdata == userdata) {
-            result = first->time - GetTicks();
-
-            Event* next = first->next;
-            FreeEvent(first);
-            first = next;
-        } else {
-            break;
-        }
-    }
-    if (!first)
-        return result;
-
-    Event* prev_event = first;
-    Event* ptr = prev_event->next;
-
-    while (ptr) {
-        if (ptr->type == event_type && ptr->userdata == userdata) {
-            result = ptr->time - GetTicks();
-
-            prev_event->next = ptr->next;
-            FreeEvent(ptr);
-            ptr = prev_event->next;
-        } else {
-            prev_event = ptr;
-            ptr = ptr->next;
-        }
-    }
-
-    return result;
-}
-
-s64 UnscheduleThreadsafeEvent(int event_type, u64 userdata) {
-    s64 result = 0;
-    std::lock_guard<std::recursive_mutex> lock(external_event_section);
-    if (!ts_first)
-        return result;
-
-    while (ts_first) {
-        if (ts_first->type == event_type && ts_first->userdata == userdata) {
-            result = ts_first->time - GetTicks();
-
-            Event* next = ts_first->next;
-            FreeTsEvent(ts_first);
-            ts_first = next;
-        } else {
-            break;
-        }
-    }
-
-    if (!ts_first) {
-        ts_last = nullptr;
-        return result;
-    }
-
-    Event* prev_event = ts_first;
-    Event* next = prev_event->next;
-    while (next) {
-        if (next->type == event_type && next->userdata == userdata) {
-            result = next->time - GetTicks();
-
-            prev_event->next = next->next;
-            if (next == ts_last)
-                ts_last = prev_event;
-            FreeTsEvent(next);
-            next = prev_event->next;
-        } else {
-            prev_event = next;
-            next = next->next;
-        }
-    }
-
-    return result;
-}
-
-// Warning: not included in save state.
-void RegisterAdvanceCallback(AdvanceCallback* callback) {
-    advance_callback = callback;
-}
-
-void RegisterMHzChangeCallback(MHzChangeCallback callback) {
-    mhz_change_callbacks.push_back(callback);
-}
-
-bool IsScheduled(int event_type) {
-    if (!first)
-        return false;
-    Event* event = first;
-    while (event) {
-        if (event->type == event_type)
-            return true;
-        event = event->next;
-    }
-    return false;
-}
-
-void RemoveEvent(int event_type) {
-    if (!first)
-        return;
-    while (first) {
-        if (first->type == event_type) {
-            Event* next = first->next;
-            FreeEvent(first);
-            first = next;
-        } else {
-            break;
-        }
-    }
-    if (!first)
-        return;
-    Event* prev = first;
-    Event* next = prev->next;
-    while (next) {
-        if (next->type == event_type) {
-            prev->next = next->next;
-            FreeEvent(next);
-            next = prev->next;
-        } else {
-            prev = next;
-            next = next->next;
-        }
-    }
-}
-
-void RemoveThreadsafeEvent(int event_type) {
-    std::lock_guard<std::recursive_mutex> lock(external_event_section);
-    if (!ts_first)
-        return;
-
-    while (ts_first) {
-        if (ts_first->type == event_type) {
-            Event* next = ts_first->next;
-            FreeTsEvent(ts_first);
-            ts_first = next;
-        } else {
-            break;
-        }
-    }
-
-    if (!ts_first) {
-        ts_last = nullptr;
-        return;
-    }
-
-    Event* prev = ts_first;
-    Event* next = prev->next;
-    while (next) {
-        if (next->type == event_type) {
-            prev->next = next->next;
-            if (next == ts_last)
-                ts_last = prev;
-            FreeTsEvent(next);
-            next = prev->next;
-        } else {
-            prev = next;
-            next = next->next;
-        }
-    }
-}
-
-void RemoveAllEvents(int event_type) {
-    RemoveThreadsafeEvent(event_type);
+void RemoveNormalAndThreadsafeEvent(const EventType* event_type) {
+    MoveEvents();
    RemoveEvent(event_type);
 }

-// This raise only the events required while the fifo is processing data
-void ProcessFifoWaitEvents() {
-    while (first) {
-        if (first->time <= (s64)GetTicks()) {
-            Event* evt = first;
-            first = first->next;
-            event_types[evt->type].callback(evt->userdata, (int)(GetTicks() - evt->time));
-            FreeEvent(evt);
-        } else {
-            break;
-        }
+void ForceExceptionCheck(s64 cycles) {
+    cycles = std::max<s64>(0, cycles);
+    if (downcount > cycles) {
+        // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
+        // here. Account for cycles already executed by adjusting the g.slice_length
+        slice_length -= downcount - static_cast<int>(cycles);
+        downcount = static_cast<int>(cycles);
    }
 }

 void MoveEvents() {
-    has_ts_events = false;
-
-    std::lock_guard<std::recursive_mutex> lock(external_event_section);
-    // Move events from async queue into main queue
-    while (ts_first) {
-        Event* next = ts_first->next;
-        AddEventToQueue(ts_first);
-        ts_first = next;
+    for (Event ev; ts_queue.Pop(ev);) {
+        ev.fifo_order = event_fifo_id++;
+        event_queue.emplace_back(std::move(ev));
+        std::push_heap(event_queue.begin(), event_queue.end(), std::greater<Event>());
    }
-    ts_last = nullptr;
-
-    // Move free events to threadsafe pool
-    while (allocated_ts_events > 0 && event_pool) {
-        Event* event = event_pool;
-        event_pool = event->next;
-        event->next = event_ts_pool;
-        event_ts_pool = event;
-        allocated_ts_events--;
-    }
-}
-
-void ForceCheck() {
-    s64 cycles_executed = g_slice_length - down_count;
-    global_timer += cycles_executed;
-    // This will cause us to check for new events immediately.
-    down_count = 0;
-    // But let's not eat a bunch more time in Advance() because of this.
-    g_slice_length = 0;
 }

 void Advance() {
-    s64 cycles_executed = g_slice_length - down_count;
+    MoveEvents();
+
+    int cycles_executed = slice_length - downcount;
    global_timer += cycles_executed;
-    down_count = g_slice_length;
+    slice_length = MAX_SLICE_LENGTH;

-    if (has_ts_events)
-        MoveEvents();
-    ProcessFifoWaitEvents();
+    is_global_timer_sane = true;

-    if (!first) {
-        if (g_slice_length < 10000) {
-            g_slice_length += 10000;
-            down_count += g_slice_length;
-        }
-    } else {
-        // Note that events can eat cycles as well.
-        int target = (int)(first->time - global_timer);
-        if (target > MAX_SLICE_LENGTH)
-            target = MAX_SLICE_LENGTH;
-
-        const int diff = target - g_slice_length;
-        g_slice_length += diff;
-        down_count += diff;
-    }
-    if (advance_callback)
-        advance_callback(static_cast<int>(cycles_executed));
-}
-
-void LogPendingEvents() {
-    Event* event = first;
-    while (event) {
-        // LOG_TRACE(Core_Timing, "PENDING: Now: %lld Pending: %lld Type: %d", globalTimer,
-        // next->time, next->type);
-        event = event->next;
-    }
-}
-
-void Idle(int max_idle) {
-    s64 cycles_down = down_count;
-    if (max_idle != 0 && cycles_down > max_idle)
-        cycles_down = max_idle;
-
-    if (first && cycles_down > 0) {
-        s64 cycles_executed = g_slice_length - down_count;
-        s64 cycles_next_event = first->time - global_timer;
-
-        if (cycles_next_event < cycles_executed + cycles_down) {
-            cycles_down = cycles_next_event - cycles_executed;
-            // Now, now... no time machines, please.
-            if (cycles_down < 0)
-                cycles_down = 0;
-        }
+    while (!event_queue.empty() && event_queue.front().time <= global_timer) {
+        Event evt = std::move(event_queue.front());
+        std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<Event>());
+        event_queue.pop_back();
+        evt.type->callback(evt.userdata, global_timer - evt.time);
    }

-    LOG_TRACE(Core_Timing, "Idle for %" PRId64 " cycles! (%f ms)", cycles_down,
-              cycles_down / (float)(g_clock_rate_arm11 * 0.001f));
+    is_global_timer_sane = false;

-    idled_cycles += cycles_down;
-    down_count -= cycles_down;
-    if (down_count == 0)
-        down_count = -1;
-}
-
-std::string GetScheduledEventsSummary() {
-    Event* event = first;
-    std::string text = "Scheduled events\n";
-    text.reserve(1000);
-    while (event) {
-        unsigned int t = event->type;
-        if (t >= event_types.size())
-            LOG_ERROR(Core_Timing, "Invalid event type"); // %i", t);
-        const char* name = event_types[event->type].name;
-        if (!name)
-            name = "[unknown]";
-        text += Common::StringFromFormat("%s : %i %08x%08x\n", name, (int)event->time,
-                                         (u32)(event->userdata >> 32), (u32)(event->userdata));
-        event = event->next;
+    // Still events left (scheduled in the future)
+    if (!event_queue.empty()) {
+        slice_length = static_cast<int>(
+            std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH));
    }
-    return text;
+
+    downcount = slice_length;
 }

-} // namespace
+void Idle() {
+    idled_cycles += downcount;
+    downcount = 0;
+}
+
+u64 GetGlobalTimeUs() {
+    return GetTicks() * 1000000 / BASE_CLOCK_RATE;
+}
+
+int GetDowncount() {
+    return downcount;
+}
+
+} // namespace CoreTiming
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@ -1,144 +1,191 @@
-// Copyright (c) 2012- PPSSPP Project / Dolphin Project.
-// Licensed under GPLv2 or any later version
+// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project
+// Licensed under GPLv2+
 // Refer to the license.txt file included.

 #pragma once

+/**
+ * This is a system to schedule events into the emulated machine's future. Time is measured
+ * in main CPU clock cycles.
+ *
+ * To schedule an event, you first have to register its type. This is where you pass in the
+ * callback. You then schedule events using the type id you get back.
+ *
+ * The int cyclesLate that the callbacks get is how many cycles late it was.
+ * So to schedule a new event on a regular basis:
+ * inside callback:
+ *   ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever")
+ */
+
 #include <functional>
+#include <limits>
 #include <string>
 #include "common/common_types.h"
+#include "common/logging/log.h"

-// This is a system to schedule events into the emulated machine's future. Time is measured
-// in main CPU clock cycles.
-
-// To schedule an event, you first have to register its type. This is where you pass in the
-// callback. You then schedule events using the type id you get back.
-
-// See HW/SystemTimers.cpp for the main part of Dolphin's usage of this scheduler.
-
-// The int cycles_late that the callbacks get is how many cycles late it was.
-// So to schedule a new event on a regular basis:
-// inside callback:
-//   ScheduleEvent(periodInCycles - cycles_late, callback, "whatever")
-
-constexpr int BASE_CLOCK_RATE = 383778816; // Switch clock speed is 384MHz docked
-extern int g_clock_rate_arm11;
+// The timing we get from the assembly is 268,111,855.956 Hz
+// It is possible that this number isn't just an integer because the compiler could have
+// optimized the multiplication by a multiply-by-constant division.
+// Rounding to the nearest integer should be fine
+constexpr u64 BASE_CLOCK_RATE = 383778816; // Switch clock speed is 384MHz docked
+constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE;

 inline s64 msToCycles(int ms) {
-    return (s64)g_clock_rate_arm11 / 1000 * ms;
+    // since ms is int there is no way to overflow
+    return BASE_CLOCK_RATE * static_cast<s64>(ms) / 1000;
 }

 inline s64 msToCycles(float ms) {
-    return (s64)(g_clock_rate_arm11 * ms * (0.001f));
+    return static_cast<s64>(BASE_CLOCK_RATE * (0.001f) * ms);
 }

 inline s64 msToCycles(double ms) {
-    return (s64)(g_clock_rate_arm11 * ms * (0.001));
+    return static_cast<s64>(BASE_CLOCK_RATE * (0.001) * ms);
 }

 inline s64 usToCycles(float us) {
-    return (s64)(g_clock_rate_arm11 * us * (0.000001f));
+    return static_cast<s64>(BASE_CLOCK_RATE * (0.000001f) * us);
 }

 inline s64 usToCycles(int us) {
-    return (g_clock_rate_arm11 / 1000000 * (s64)us);
+    return (BASE_CLOCK_RATE * static_cast<s64>(us) / 1000000);
 }

 inline s64 usToCycles(s64 us) {
-    return (g_clock_rate_arm11 / 1000000 * us);
+    if (us / 1000000 > MAX_VALUE_TO_MULTIPLY) {
+        LOG_ERROR(Core_Timing, "Integer overflow, use max value");
+        return std::numeric_limits<s64>::max();
+    }
+    if (us > MAX_VALUE_TO_MULTIPLY) {
+        LOG_DEBUG(Core_Timing, "Time very big, do rounding");
+        return BASE_CLOCK_RATE * (us / 1000000);
+    }
+    return (BASE_CLOCK_RATE * us) / 1000000;
 }

 inline s64 usToCycles(u64 us) {
-    return (s64)(g_clock_rate_arm11 / 1000000 * us);
+    if (us / 1000000 > MAX_VALUE_TO_MULTIPLY) {
+        LOG_ERROR(Core_Timing, "Integer overflow, use max value");
+        return std::numeric_limits<s64>::max();
+    }
+    if (us > MAX_VALUE_TO_MULTIPLY) {
+        LOG_DEBUG(Core_Timing, "Time very big, do rounding");
+        return BASE_CLOCK_RATE * static_cast<s64>(us / 1000000);
+    }
+    return (BASE_CLOCK_RATE * static_cast<s64>(us)) / 1000000;
+}
+
+inline s64 nsToCycles(float ns) {
+    return static_cast<s64>(BASE_CLOCK_RATE * (0.000000001f) * ns);
+}
+
+inline s64 nsToCycles(int ns) {
+    return BASE_CLOCK_RATE * static_cast<s64>(ns) / 1000000000;
+}
+
+inline s64 nsToCycles(s64 ns) {
+    if (ns / 1000000000 > MAX_VALUE_TO_MULTIPLY) {
+        LOG_ERROR(Core_Timing, "Integer overflow, use max value");
+        return std::numeric_limits<s64>::max();
+    }
+    if (ns > MAX_VALUE_TO_MULTIPLY) {
+        LOG_DEBUG(Core_Timing, "Time very big, do rounding");
+        return BASE_CLOCK_RATE * (ns / 1000000000);
+    }
+    return (BASE_CLOCK_RATE * ns) / 1000000000;
+}
+
+inline s64 nsToCycles(u64 ns) {
+    if (ns / 1000000000 > MAX_VALUE_TO_MULTIPLY) {
+        LOG_ERROR(Core_Timing, "Integer overflow, use max value");
+        return std::numeric_limits<s64>::max();
+    }
+    if (ns > MAX_VALUE_TO_MULTIPLY) {
+        LOG_DEBUG(Core_Timing, "Time very big, do rounding");
+        return BASE_CLOCK_RATE * (static_cast<s64>(ns) / 1000000000);
+    }
+    return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
+}
+
+inline u64 cyclesToNs(s64 cycles) {
+    return cycles * 1000000000 / BASE_CLOCK_RATE;
 }

 inline s64 cyclesToUs(s64 cycles) {
-    return cycles / (g_clock_rate_arm11 / 1000000);
+    return cycles * 1000000 / BASE_CLOCK_RATE;
 }

 inline u64 cyclesToMs(s64 cycles) {
-    return cycles / (g_clock_rate_arm11 / 1000);
+    return cycles * 1000 / BASE_CLOCK_RATE;
 }

 namespace CoreTiming {
+
+/**
+ * CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
+ * required to end slice -1 and start slice 0 before the first cycle of code is executed.
+ */
 void Init();
 void Shutdown();

-typedef void (*MHzChangeCallback)();
 typedef std::function<void(u64 userdata, int cycles_late)> TimedCallback;

 /**
-* Advance the CPU core by the specified number of ticks (e.g. to simulate CPU execution time)
-* @param ticks Number of ticks to advance the CPU core
-*/
-void AddTicks(u64 ticks);
-
+ * This should only be called from the emu thread, if you are calling it any other thread, you are
+ * doing something evil
+ */
 u64 GetTicks();
 u64 GetIdleTicks();
-u64 GetGlobalTimeUs();
+void AddTicks(u64 ticks);
+
+struct EventType;

 /**
- * Registers an event type with the specified name and callback
- * @param name Name of the event type
- * @param callback Function that will execute when this event fires
- * @returns An identifier for the event type that was registered
+ * Returns the event_type identifier. if name is not unique, it will assert.
 */
-int RegisterEvent(const char* name, TimedCallback callback);
-/// For save states.
-void RestoreRegisterEvent(int event_type, const char* name, TimedCallback callback);
+EventType* RegisterEvent(const std::string& name, TimedCallback callback);
 void UnregisterAllEvents();

-/// userdata MAY NOT CONTAIN POINTERS. userdata might get written and reloaded from disk,
-/// when we implement state saves.
 /**
- * Schedules an event to run after the specified number of cycles,
- * with an optional parameter to be passed to the callback handler.
- * This must be run ONLY from within the cpu thread.
- * @param cycles_into_future The number of cycles after which this event will be fired
- * @param event_type The event type to fire, as returned from RegisterEvent
- * @param userdata Optional parameter to pass to the callback when fired
+ * After the first Advance, the slice lengths and the downcount will be reduced whenever an event
+ * is scheduled earlier than the current values.
+ * Scheduling from a callback will not update the downcount until the Advance() completes.
 */
-void ScheduleEvent(s64 cycles_into_future, int event_type, u64 userdata = 0);
-
-void ScheduleEvent_Threadsafe(s64 cycles_into_future, int event_type, u64 userdata = 0);
-void ScheduleEvent_Threadsafe_Immediate(int event_type, u64 userdata = 0);
+void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0);

 /**
- * Unschedules an event with the specified type and userdata
- * @param event_type The type of event to unschedule, as returned from RegisterEvent
- * @param userdata The userdata that identifies this event, as passed to ScheduleEvent
- * @returns The remaining ticks until the next invocation of the event callback
+ * This is to be called when outside of hle threads, such as the graphics thread, wants to
+ * schedule things to be executed on the main thread.
+ * Not that this doesn't change slice_length and thus events scheduled by this might be called
+ * with a delay of up to MAX_SLICE_LENGTH
 */
-s64 UnscheduleEvent(int event_type, u64 userdata);
+void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata);

-s64 UnscheduleThreadsafeEvent(int event_type, u64 userdata);
+void UnscheduleEvent(const EventType* event_type, u64 userdata);

-void RemoveEvent(int event_type);
-void RemoveThreadsafeEvent(int event_type);
-void RemoveAllEvents(int event_type);
-bool IsScheduled(int event_type);
-/// Runs any pending events and updates downcount for the next slice of cycles
+/// We only permit one event of each type in the queue at a time.
+void RemoveEvent(const EventType* event_type);
+void RemoveNormalAndThreadsafeEvent(const EventType* event_type);
+
+/** Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends
+ * the previous timing slice and begins the next one, you must Advance from the previous
+ * slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an
+ * Advance() is required to initialize the slice length before the first cycle of emulated
+ * instructions is executed.
+ */
 void Advance();
 void MoveEvents();
-void ProcessFifoWaitEvents();
-void ForceCheck();

 /// Pretend that the main CPU has executed enough cycles to reach the next event.
-void Idle(int maxIdle = 0);
+void Idle();

-/// Clear all pending events. This should ONLY be done on exit or state load.
+/// Clear all pending events. This should ONLY be done on exit.
 void ClearPendingEvents();

-void LogPendingEvents();
+void ForceExceptionCheck(s64 cycles);

-/// Warning: not included in save states.
-void RegisterAdvanceCallback(void (*callback)(int cycles_executed));
-void RegisterMHzChangeCallback(MHzChangeCallback callback);
+u64 GetGlobalTimeUs();

-std::string GetScheduledEventsSummary();
+int GetDowncount();

-void SetClockFrequencyMHz(int cpu_mhz);
-int GetClockFrequencyMHz();
-extern int g_slice_length;
-
-} // namespace
+} // namespace CoreTiming
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@ -26,7 +26,7 @@
 namespace Kernel {

 /// Event type for the thread wake up event
-static int ThreadWakeupEventType;
+static CoreTiming::EventType* ThreadWakeupEventType = nullptr;

 bool Thread::ShouldWait(Thread* thread) const {
    return status != THREADSTATUS_DEAD;
@ -265,8 +265,7 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {
    if (nanoseconds == -1)
        return;

-    u64 microseconds = nanoseconds / 1000;
-    CoreTiming::ScheduleEvent(usToCycles(microseconds), ThreadWakeupEventType, callback_handle);
+    CoreTiming::ScheduleEvent(nsToCycles(nanoseconds), ThreadWakeupEventType, callback_handle);
 }

 void Thread::ResumeFromWait() {
--- a/src/core/hle/kernel/timer.cpp
+++ b/src/core/hle/kernel/timer.cpp
@ -14,7 +14,7 @@
 namespace Kernel {

 /// The event type of the generic timer callback event
-static int timer_callback_event_type;
+static CoreTiming::EventType* timer_callback_event_type = nullptr;
 // TODO(yuriks): This can be removed if Timer objects are explicitly pooled in the future, allowing
 //               us to simply use a pool index or similar.
 static Kernel::HandleTable timer_callback_handle_table;
@ -57,9 +57,7 @@ void Timer::Set(s64 initial, s64 interval) {
        // Immediately invoke the callback
        Signal(0);
    } else {
-        u64 initial_microseconds = initial / 1000;
-        CoreTiming::ScheduleEvent(usToCycles(initial_microseconds), timer_callback_event_type,
-                                  callback_handle);
+        CoreTiming::ScheduleEvent(nsToCycles(initial), timer_callback_event_type, callback_handle);
    }
 }

@ -88,8 +86,7 @@ void Timer::Signal(int cycles_late) {

    if (interval_delay != 0) {
        // Reschedule the timer with the interval delay
-        u64 interval_microseconds = interval_delay / 1000;
-        CoreTiming::ScheduleEvent(usToCycles(interval_microseconds) - cycles_late,
+        CoreTiming::ScheduleEvent(nsToCycles(interval_delay) - cycles_late,
                                  timer_callback_event_type, callback_handle);
    }
 }
--- a/src/core/hle/shared_page.cpp
+++ b/src/core/hle/shared_page.cpp
@ -14,7 +14,7 @@ namespace SharedPage {

 SharedPageDef shared_page;

-static int update_time_event;
+static CoreTiming::EventType* update_time_event;

 /// Gets system time in 3DS format. The epoch is Jan 1900, and the unit is millisecond.
 static u64 GetSystemTime() {
@ -56,7 +56,7 @@ static void UpdateTimeCallback(u64 userdata, int cycles_late) {

    date_time.date_time = GetSystemTime();
    date_time.update_tick = CoreTiming::GetTicks();
-    date_time.tick_to_second_coefficient = g_clock_rate_arm11;
+    date_time.tick_to_second_coefficient = BASE_CLOCK_RATE;
    date_time.tick_offset = 0;

    ++shared_page.date_time_counter;
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@ -31,7 +31,7 @@ Regs g_regs;
 /// 268MHz CPU clocks / 60Hz frames per second
 const u64 frame_ticks = static_cast<u64>(BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
 /// Event id for CoreTiming
-static int vblank_event;
+static CoreTiming::EventType* vblank_event;

 template <typename T>
 inline void Read(T& var, const u32 raw_addr) {
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@ -1,6 +1,7 @@
 set(SRCS
            common/param_package.cpp
            core/arm/arm_test_common.cpp
+            core/core_timing.cpp
            core/file_sys/path_parser.cpp
            core/memory/memory.cpp
            glad.cpp
--- a/src/tests/core/core_timing.cpp
+++ b/src/tests/core/core_timing.cpp
@ -0,0 +1,237 @@
+// Copyright 2016 Dolphin Emulator Project / 2017 Dolphin Emulator Project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#include <catch.hpp>
+
+#include <array>
+#include <bitset>
+#include <string>
+#include "common/file_util.h"
+#include "core/core.h"
+#include "core/core_timing.h"
+
+// Numbers are chosen randomly to make sure the correct one is given.
+static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
+static constexpr int MAX_SLICE_LENGTH = 20000; // Copied from CoreTiming internals
+
+static std::bitset<CB_IDS.size()> callbacks_ran_flags;
+static u64 expected_callback = 0;
+static s64 lateness = 0;
+
+template <unsigned int IDX>
+void CallbackTemplate(u64 userdata, s64 cycles_late) {
+    static_assert(IDX < CB_IDS.size(), "IDX out of range");
+    callbacks_ran_flags.set(IDX);
+    REQUIRE(CB_IDS[IDX] == userdata);
+    REQUIRE(CB_IDS[IDX] == expected_callback);
+    REQUIRE(lateness == cycles_late);
+}
+
+class ScopeInit final {
+public:
+    ScopeInit() {
+        CoreTiming::Init();
+    }
+    ~ScopeInit() {
+        CoreTiming::Shutdown();
+    }
+};
+
+static void AdvanceAndCheck(u32 idx, int downcount, int expected_lateness = 0,
+                            int cpu_downcount = 0) {
+    callbacks_ran_flags = 0;
+    expected_callback = CB_IDS[idx];
+    lateness = expected_lateness;
+
+    CoreTiming::AddTicks(CoreTiming::GetDowncount() -
+                         cpu_downcount); // Pretend we executed X cycles of instructions.
+    CoreTiming::Advance();
+
+    REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags);
+    REQUIRE(downcount == CoreTiming::GetDowncount());
+}
+
+TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
+    ScopeInit guard;
+
+    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
+    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
+    CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>);
+    CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>);
+    CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>);
+
+    // Enter slice 0
+    CoreTiming::Advance();
+
+    // D -> B -> C -> A -> E
+    CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]);
+    REQUIRE(1000 == CoreTiming::GetDowncount());
+    CoreTiming::ScheduleEvent(500, cb_b, CB_IDS[1]);
+    REQUIRE(500 == CoreTiming::GetDowncount());
+    CoreTiming::ScheduleEvent(800, cb_c, CB_IDS[2]);
+    REQUIRE(500 == CoreTiming::GetDowncount());
+    CoreTiming::ScheduleEvent(100, cb_d, CB_IDS[3]);
+    REQUIRE(100 == CoreTiming::GetDowncount());
+    CoreTiming::ScheduleEvent(1200, cb_e, CB_IDS[4]);
+    REQUIRE(100 == CoreTiming::GetDowncount());
+
+    AdvanceAndCheck(3, 400);
+    AdvanceAndCheck(1, 300);
+    AdvanceAndCheck(2, 200);
+    AdvanceAndCheck(0, 200);
+    AdvanceAndCheck(4, MAX_SLICE_LENGTH);
+}
+
+TEST_CASE("CoreTiming[Threadsave]", "[core]") {
+    ScopeInit guard;
+
+    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
+    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
+    CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>);
+    CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>);
+    CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>);
+
+    // Enter slice 0
+    CoreTiming::Advance();
+
+    // D -> B -> C -> A -> E
+    CoreTiming::ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]);
+    // Manually force since ScheduleEventThreadsafe doesn't call it
+    CoreTiming::ForceExceptionCheck(1000);
+    REQUIRE(1000 == CoreTiming::GetDowncount());
+    CoreTiming::ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]);
+    // Manually force since ScheduleEventThreadsafe doesn't call it
+    CoreTiming::ForceExceptionCheck(500);
+    REQUIRE(500 == CoreTiming::GetDowncount());
+    CoreTiming::ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]);
+    // Manually force since ScheduleEventThreadsafe doesn't call it
+    CoreTiming::ForceExceptionCheck(800);
+    REQUIRE(500 == CoreTiming::GetDowncount());
+    CoreTiming::ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]);
+    // Manually force since ScheduleEventThreadsafe doesn't call it
+    CoreTiming::ForceExceptionCheck(100);
+    REQUIRE(100 == CoreTiming::GetDowncount());
+    CoreTiming::ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]);
+    // Manually force since ScheduleEventThreadsafe doesn't call it
+    CoreTiming::ForceExceptionCheck(1200);
+    REQUIRE(100 == CoreTiming::GetDowncount());
+
+    AdvanceAndCheck(3, 400);
+    AdvanceAndCheck(1, 300);
+    AdvanceAndCheck(2, 200);
+    AdvanceAndCheck(0, 200);
+    AdvanceAndCheck(4, MAX_SLICE_LENGTH);
+}
+
+namespace SharedSlotTest {
+static unsigned int counter = 0;
+
+template <unsigned int ID>
+void FifoCallback(u64 userdata, s64 cycles_late) {
+    static_assert(ID < CB_IDS.size(), "ID out of range");
+    callbacks_ran_flags.set(ID);
+    REQUIRE(CB_IDS[ID] == userdata);
+    REQUIRE(ID == counter);
+    REQUIRE(lateness == cycles_late);
+    ++counter;
+}
+} // namespace SharedSlotTest
+
+TEST_CASE("CoreTiming[SharedSlot]", "[core]") {
+    using namespace SharedSlotTest;
+
+    ScopeInit guard;
+
+    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", FifoCallback<0>);
+    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", FifoCallback<1>);
+    CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", FifoCallback<2>);
+    CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", FifoCallback<3>);
+    CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", FifoCallback<4>);
+
+    CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]);
+    CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]);
+    CoreTiming::ScheduleEvent(1000, cb_c, CB_IDS[2]);
+    CoreTiming::ScheduleEvent(1000, cb_d, CB_IDS[3]);
+    CoreTiming::ScheduleEvent(1000, cb_e, CB_IDS[4]);
+
+    // Enter slice 0
+    CoreTiming::Advance();
+    REQUIRE(1000 == CoreTiming::GetDowncount());
+
+    callbacks_ran_flags = 0;
+    counter = 0;
+    lateness = 0;
+    CoreTiming::AddTicks(CoreTiming::GetDowncount());
+    CoreTiming::Advance();
+    REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount());
+    REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong());
+}
+
+TEST_CASE("CoreTiming[PredictableLateness]", "[core]") {
+    ScopeInit guard;
+
+    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
+    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
+
+    // Enter slice 0
+    CoreTiming::Advance();
+
+    CoreTiming::ScheduleEvent(100, cb_a, CB_IDS[0]);
+    CoreTiming::ScheduleEvent(200, cb_b, CB_IDS[1]);
+
+    AdvanceAndCheck(0, 90, 10, -10); // (100 - 10)
+    AdvanceAndCheck(1, MAX_SLICE_LENGTH, 50, -50);
+}
+
+namespace ChainSchedulingTest {
+static int reschedules = 0;
+
+static void RescheduleCallback(u64 userdata, s64 cycles_late) {
+    --reschedules;
+    REQUIRE(reschedules >= 0);
+    REQUIRE(lateness == cycles_late);
+
+    if (reschedules > 0)
+        CoreTiming::ScheduleEvent(1000, reinterpret_cast<CoreTiming::EventType*>(userdata),
+                                  userdata);
+}
+} // namespace ChainSchedulingTest
+
+TEST_CASE("CoreTiming[ChainScheduling]", "[core]") {
+    using namespace ChainSchedulingTest;
+
+    ScopeInit guard;
+
+    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
+    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
+    CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>);
+    CoreTiming::EventType* cb_rs =
+        CoreTiming::RegisterEvent("callbackReschedule", RescheduleCallback);
+
+    // Enter slice 0
+    CoreTiming::Advance();
+
+    CoreTiming::ScheduleEvent(800, cb_a, CB_IDS[0]);
+    CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]);
+    CoreTiming::ScheduleEvent(2200, cb_c, CB_IDS[2]);
+    CoreTiming::ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs));
+    REQUIRE(800 == CoreTiming::GetDowncount());
+
+    reschedules = 3;
+    AdvanceAndCheck(0, 200);  // cb_a
+    AdvanceAndCheck(1, 1000); // cb_b, cb_rs
+    REQUIRE(2 == reschedules);
+
+    CoreTiming::AddTicks(CoreTiming::GetDowncount());
+    CoreTiming::Advance(); // cb_rs
+    REQUIRE(1 == reschedules);
+    REQUIRE(200 == CoreTiming::GetDowncount());
+
+    AdvanceAndCheck(2, 800); // cb_c
+
+    CoreTiming::AddTicks(CoreTiming::GetDowncount());
+    CoreTiming::Advance(); // cb_rs
+    REQUIRE(0 == reschedules);
+    REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount());
+}