CoreTiming: Reworked CoreTiming (cherry-picked from Citra #3119)

* CoreTiming: New CoreTiming; Add Test for CoreTiming
master
B3n30 2017-11-25 14:56:57 +07:00 committed by bunnei
parent c12c756539
commit 82151d407d
12 changed files with 665 additions and 584 deletions

@ -18,7 +18,7 @@
namespace AudioCore {
// Audio Ticks occur about every 5 miliseconds.
static int tick_event; ///< CoreTiming event
static CoreTiming::EventType* tick_event; ///< CoreTiming event
static constexpr u64 audio_frame_ticks = 1310252ull; ///< Units: ARM11 cycles
static void AudioTickCallback(u64 /*userdata*/, int cycles_late) {

@ -76,6 +76,7 @@ set(HEADERS
telemetry.h
thread.h
thread_queue_list.h
threadsafe_queue.h
timer.h
vector_math.h
)

@ -0,0 +1,122 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
// a simple lockless thread-safe,
// single reader, single writer queue
#include <algorithm>
#include <atomic>
#include <cstddef>
#include <mutex>
#include "common/common_types.h"
namespace Common {
template <typename T, bool NeedSize = true>
class SPSCQueue {
public:
SPSCQueue() : size(0) {
write_ptr = read_ptr = new ElementPtr();
}
~SPSCQueue() {
// this will empty out the whole queue
delete read_ptr;
}
u32 Size() const {
static_assert(NeedSize, "using Size() on FifoQueue without NeedSize");
return size.load();
}
bool Empty() const {
return !read_ptr->next.load();
}
T& Front() const {
return read_ptr->current;
}
template <typename Arg>
void Push(Arg&& t) {
// create the element, add it to the queue
write_ptr->current = std::forward<Arg>(t);
// set the next pointer to a new element ptr
// then advance the write pointer
ElementPtr* new_ptr = new ElementPtr();
write_ptr->next.store(new_ptr, std::memory_order_release);
write_ptr = new_ptr;
if (NeedSize)
size++;
}
void Pop() {
if (NeedSize)
size--;
ElementPtr* tmpptr = read_ptr;
// advance the read pointer
read_ptr = tmpptr->next.load();
// set the next element to nullptr to stop the recursive deletion
tmpptr->next.store(nullptr);
delete tmpptr; // this also deletes the element
}
bool Pop(T& t) {
if (Empty())
return false;
if (NeedSize)
size--;
ElementPtr* tmpptr = read_ptr;
read_ptr = tmpptr->next.load(std::memory_order_acquire);
t = std::move(tmpptr->current);
tmpptr->next.store(nullptr);
delete tmpptr;
return true;
}
// not thread-safe
void Clear() {
size.store(0);
delete read_ptr;
write_ptr = read_ptr = new ElementPtr();
}
private:
// stores a pointer to element
// and a pointer to the next ElementPtr
class ElementPtr {
public:
ElementPtr() : next(nullptr) {}
~ElementPtr() {
ElementPtr* next_ptr = next.load();
if (next_ptr)
delete next_ptr;
}
T current;
std::atomic<ElementPtr*> next;
};
ElementPtr* write_ptr;
ElementPtr* read_ptr;
std::atomic<u32> size;
};
// a simple thread-safe,
// single reader, multiple writer queue
template <typename T, bool NeedSize = true>
class MPSCQueue : public SPSCQueue<T, NeedSize> {
public:
template <typename Arg>
void Push(Arg&& t) {
std::lock_guard<std::mutex> lock(write_lock);
SPSCQueue<T, NeedSize>::Push(t);
}
private:
std::mutex write_lock;
};
} // namespace Common

@ -54,6 +54,7 @@ System::ResultStatus System::RunLoop(int tight_loop) {
CoreTiming::Advance();
PrepareReschedule();
} else {
CoreTiming::Advance();
cpu_core->Run(tight_loop);
}

@ -1,562 +1,238 @@
// Copyright (c) 2012- PPSSPP Project / Dolphin Project.
// Licensed under GPLv2 or any later version
// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <atomic>
#include <cinttypes>
#include <mutex>
#include <vector>
#include "common/chunk_file.h"
#include "common/logging/log.h"
#include "common/string_util.h"
#include "core/arm/arm_interface.h"
#include "core/core.h"
#include "core/core_timing.h"
int g_clock_rate_arm11 = BASE_CLOCK_RATE;
// is this really necessary?
#define INITIAL_SLICE_LENGTH 20000
#define MAX_SLICE_LENGTH 100000000
#include <algorithm>
#include <cinttypes>
#include <mutex>
#include <string>
#include <tuple>
#include <unordered_map>
#include <vector>
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/thread.h"
#include "common/threadsafe_queue.h"
namespace CoreTiming {
struct EventType {
EventType() {}
EventType(TimedCallback cb, const char* n) : callback(cb), name(n) {}
TimedCallback callback;
const char* name;
};
static std::vector<EventType> event_types;
struct BaseEvent {
s64 time;
u64 userdata;
int type;
};
typedef LinkedListItem<BaseEvent> Event;
static Event* first;
static Event* ts_first;
static Event* ts_last;
// event pools
static Event* event_pool = nullptr;
static Event* event_ts_pool = nullptr;
static int allocated_ts_events = 0;
// Optimization to skip MoveEvents when possible.
static std::atomic<bool> has_ts_events(false);
int g_slice_length;
static s64 global_timer;
static int slice_length;
static int downcount;
struct EventType {
TimedCallback callback;
const std::string* name;
};
struct Event {
s64 time;
u64 fifo_order;
u64 userdata;
const EventType* type;
};
// Sort by time, unless the times are the same, in which case sort by the order added to the queue
static bool operator>(const Event& left, const Event& right) {
return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
}
static bool operator<(const Event& left, const Event& right) {
return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
}
// unordered_map stores each element separately as a linked list node so pointers to elements
// remain stable regardless of rehashes/resizing.
static std::unordered_map<std::string, EventType> event_types;
// The queue is a min-heap using std::make_heap/push_heap/pop_heap.
// We don't use std::priority_queue because we need to be able to serialize, unserialize and
// erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't accomodated
// by the standard adaptor class.
static std::vector<Event> event_queue;
static u64 event_fifo_id;
// the queue for storing the events from other threads threadsafe until they will be added
// to the event_queue by the emu thread
static Common::MPSCQueue<Event, false> ts_queue;
static constexpr int MAX_SLICE_LENGTH = 20000;
static s64 idled_cycles;
static s64 last_global_time_ticks;
static s64 last_global_time_us;
static s64 down_count = 0; ///< A decreasing counter of remaining cycles before the next event,
/// decreased by the cpu run loop
// Are we in a function that has been called from Advance()
// If events are sheduled from a function that gets called from Advance(),
// don't change slice_length and downcount.
static bool is_global_timer_sane;
static std::recursive_mutex external_event_section;
static EventType* ev_lost = nullptr;
// Warning: not included in save state.
using AdvanceCallback = void(int cycles_executed);
static AdvanceCallback* advance_callback = nullptr;
static std::vector<MHzChangeCallback> mhz_change_callbacks;
static void EmptyTimedCallback(u64 userdata, s64 cyclesLate) {}
static void FireMhzChange() {
for (auto callback : mhz_change_callbacks)
callback();
}
EventType* RegisterEvent(const std::string& name, TimedCallback callback) {
// check for existing type with same name.
// we want event type names to remain unique so that we can use them for serialization.
ASSERT_MSG(event_types.find(name) == event_types.end(),
"CoreTiming Event \"%s\" is already registered. Events should only be registered "
"during Init to avoid breaking save states.",
name.c_str());
void SetClockFrequencyMHz(int cpu_mhz) {
// When the mhz changes, we keep track of what "time" it was before hand.
// This way, time always moves forward, even if mhz is changed.
last_global_time_us = GetGlobalTimeUs();
last_global_time_ticks = GetTicks();
g_clock_rate_arm11 = cpu_mhz * 1000000;
// TODO: Rescale times of scheduled events?
FireMhzChange();
}
int GetClockFrequencyMHz() {
return g_clock_rate_arm11 / 1000000;
}
u64 GetGlobalTimeUs() {
s64 ticks_since_last = GetTicks() - last_global_time_ticks;
int freq = GetClockFrequencyMHz();
s64 us_since_last = ticks_since_last / freq;
return last_global_time_us + us_since_last;
}
static Event* GetNewEvent() {
if (!event_pool)
return new Event;
Event* event = event_pool;
event_pool = event->next;
return event;
}
static Event* GetNewTsEvent() {
allocated_ts_events++;
if (!event_ts_pool)
return new Event;
Event* event = event_ts_pool;
event_ts_pool = event->next;
return event;
}
static void FreeEvent(Event* event) {
event->next = event_pool;
event_pool = event;
}
static void FreeTsEvent(Event* event) {
event->next = event_ts_pool;
event_ts_pool = event;
allocated_ts_events--;
}
int RegisterEvent(const char* name, TimedCallback callback) {
event_types.emplace_back(callback, name);
return (int)event_types.size() - 1;
}
static void AntiCrashCallback(u64 userdata, int cycles_late) {
LOG_CRITICAL(Core_Timing, "Savestate broken: an unregistered event was called.");
}
void RestoreRegisterEvent(int event_type, const char* name, TimedCallback callback) {
if (event_type >= (int)event_types.size())
event_types.resize(event_type + 1, EventType(AntiCrashCallback, "INVALID EVENT"));
event_types[event_type] = EventType(callback, name);
auto info = event_types.emplace(name, EventType{callback, nullptr});
EventType* event_type = &info.first->second;
event_type->name = &info.first->first;
return event_type;
}
void UnregisterAllEvents() {
if (first)
LOG_ERROR(Core_Timing, "Cannot unregister events with events pending");
ASSERT_MSG(event_queue.empty(), "Cannot unregister events with events pending");
event_types.clear();
}
void Init() {
down_count = INITIAL_SLICE_LENGTH;
g_slice_length = INITIAL_SLICE_LENGTH;
downcount = MAX_SLICE_LENGTH;
slice_length = MAX_SLICE_LENGTH;
global_timer = 0;
idled_cycles = 0;
last_global_time_ticks = 0;
last_global_time_us = 0;
has_ts_events = 0;
mhz_change_callbacks.clear();
first = nullptr;
ts_first = nullptr;
ts_last = nullptr;
// The time between CoreTiming being intialized and the first call to Advance() is considered
// the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before
// executing the first cycle of each slice to prepare the slice length and downcount for
// that slice.
is_global_timer_sane = true;
event_pool = nullptr;
event_ts_pool = nullptr;
allocated_ts_events = 0;
advance_callback = nullptr;
event_fifo_id = 0;
ev_lost = RegisterEvent("_lost_event", &EmptyTimedCallback);
}
void Shutdown() {
MoveEvents();
ClearPendingEvents();
UnregisterAllEvents();
}
while (event_pool) {
Event* event = event_pool;
event_pool = event->next;
delete event;
}
std::lock_guard<std::recursive_mutex> lock(external_event_section);
while (event_ts_pool) {
Event* event = event_ts_pool;
event_ts_pool = event->next;
delete event;
// This should only be called from the CPU thread. If you are calling
// it from any other thread, you are doing something evil
u64 GetTicks() {
u64 ticks = static_cast<u64>(global_timer);
if (!is_global_timer_sane) {
ticks += slice_length - downcount;
}
return ticks;
}
void AddTicks(u64 ticks) {
down_count -= ticks;
if (down_count < 0) {
Advance();
}
}
u64 GetTicks() {
return (u64)global_timer + g_slice_length - down_count;
downcount -= ticks;
}
u64 GetIdleTicks() {
return (u64)idled_cycles;
}
// This is to be called when outside threads, such as the graphics thread, wants to
// schedule things to be executed on the main thread.
void ScheduleEvent_Threadsafe(s64 cycles_into_future, int event_type, u64 userdata) {
std::lock_guard<std::recursive_mutex> lock(external_event_section);
Event* new_event = GetNewTsEvent();
new_event->time = GetTicks() + cycles_into_future;
new_event->type = event_type;
new_event->next = nullptr;
new_event->userdata = userdata;
if (!ts_first)
ts_first = new_event;
if (ts_last)
ts_last->next = new_event;
ts_last = new_event;
has_ts_events = true;
}
// Same as ScheduleEvent_Threadsafe(0, ...) EXCEPT if we are already on the CPU thread
// in which case the event will get handled immediately, before returning.
void ScheduleEvent_Threadsafe_Immediate(int event_type, u64 userdata) {
if (false) // Core::IsCPUThread())
{
std::lock_guard<std::recursive_mutex> lock(external_event_section);
event_types[event_type].callback(userdata, 0);
} else
ScheduleEvent_Threadsafe(0, event_type, userdata);
return static_cast<u64>(idled_cycles);
}
void ClearPendingEvents() {
while (first) {
Event* event = first->next;
FreeEvent(first);
first = event;
event_queue.clear();
}
void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
ASSERT(event_type != nullptr);
s64 timeout = GetTicks() + cycles_into_future;
// If this event needs to be scheduled before the next advance(), force one early
if (!is_global_timer_sane)
ForceExceptionCheck(cycles_into_future);
event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
std::push_heap(event_queue.begin(), event_queue.end(), std::greater<Event>());
}
void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
ts_queue.Push(Event{global_timer + cycles_into_future, 0, userdata, event_type});
}
void UnscheduleEvent(const EventType* event_type, u64 userdata) {
auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
return e.type == event_type && e.userdata == userdata;
});
// Removing random items breaks the invariant so we have to re-establish it.
if (itr != event_queue.end()) {
event_queue.erase(itr, event_queue.end());
std::make_heap(event_queue.begin(), event_queue.end(), std::greater<Event>());
}
}
static void AddEventToQueue(Event* new_event) {
Event* prev_event = nullptr;
Event** next_event = &first;
for (;;) {
Event*& next = *next_event;
if (!next || new_event->time < next->time) {
new_event->next = next;
next = new_event;
break;
}
prev_event = next;
next_event = &prev_event->next;
void RemoveEvent(const EventType* event_type) {
auto itr = std::remove_if(event_queue.begin(), event_queue.end(),
[&](const Event& e) { return e.type == event_type; });
// Removing random items breaks the invariant so we have to re-establish it.
if (itr != event_queue.end()) {
event_queue.erase(itr, event_queue.end());
std::make_heap(event_queue.begin(), event_queue.end(), std::greater<Event>());
}
}
void ScheduleEvent(s64 cycles_into_future, int event_type, u64 userdata) {
Event* new_event = GetNewEvent();
new_event->userdata = userdata;
new_event->type = event_type;
new_event->time = GetTicks() + cycles_into_future;
AddEventToQueue(new_event);
}
s64 UnscheduleEvent(int event_type, u64 userdata) {
s64 result = 0;
if (!first)
return result;
while (first) {
if (first->type == event_type && first->userdata == userdata) {
result = first->time - GetTicks();
Event* next = first->next;
FreeEvent(first);
first = next;
} else {
break;
}
}
if (!first)
return result;
Event* prev_event = first;
Event* ptr = prev_event->next;
while (ptr) {
if (ptr->type == event_type && ptr->userdata == userdata) {
result = ptr->time - GetTicks();
prev_event->next = ptr->next;
FreeEvent(ptr);
ptr = prev_event->next;
} else {
prev_event = ptr;
ptr = ptr->next;
}
}
return result;
}
s64 UnscheduleThreadsafeEvent(int event_type, u64 userdata) {
s64 result = 0;
std::lock_guard<std::recursive_mutex> lock(external_event_section);
if (!ts_first)
return result;
while (ts_first) {
if (ts_first->type == event_type && ts_first->userdata == userdata) {
result = ts_first->time - GetTicks();
Event* next = ts_first->next;
FreeTsEvent(ts_first);
ts_first = next;
} else {
break;
}
}
if (!ts_first) {
ts_last = nullptr;
return result;
}
Event* prev_event = ts_first;
Event* next = prev_event->next;
while (next) {
if (next->type == event_type && next->userdata == userdata) {
result = next->time - GetTicks();
prev_event->next = next->next;
if (next == ts_last)
ts_last = prev_event;
FreeTsEvent(next);
next = prev_event->next;
} else {
prev_event = next;
next = next->next;
}
}
return result;
}
// Warning: not included in save state.
void RegisterAdvanceCallback(AdvanceCallback* callback) {
advance_callback = callback;
}
void RegisterMHzChangeCallback(MHzChangeCallback callback) {
mhz_change_callbacks.push_back(callback);
}
bool IsScheduled(int event_type) {
if (!first)
return false;
Event* event = first;
while (event) {
if (event->type == event_type)
return true;
event = event->next;
}
return false;
}
void RemoveEvent(int event_type) {
if (!first)
return;
while (first) {
if (first->type == event_type) {
Event* next = first->next;
FreeEvent(first);
first = next;
} else {
break;
}
}
if (!first)
return;
Event* prev = first;
Event* next = prev->next;
while (next) {
if (next->type == event_type) {
prev->next = next->next;
FreeEvent(next);
next = prev->next;
} else {
prev = next;
next = next->next;
}
}
}
void RemoveThreadsafeEvent(int event_type) {
std::lock_guard<std::recursive_mutex> lock(external_event_section);
if (!ts_first)
return;
while (ts_first) {
if (ts_first->type == event_type) {
Event* next = ts_first->next;
FreeTsEvent(ts_first);
ts_first = next;
} else {
break;
}
}
if (!ts_first) {
ts_last = nullptr;
return;
}
Event* prev = ts_first;
Event* next = prev->next;
while (next) {
if (next->type == event_type) {
prev->next = next->next;
if (next == ts_last)
ts_last = prev;
FreeTsEvent(next);
next = prev->next;
} else {
prev = next;
next = next->next;
}
}
}
void RemoveAllEvents(int event_type) {
RemoveThreadsafeEvent(event_type);
void RemoveNormalAndThreadsafeEvent(const EventType* event_type) {
MoveEvents();
RemoveEvent(event_type);
}
// This raise only the events required while the fifo is processing data
void ProcessFifoWaitEvents() {
while (first) {
if (first->time <= (s64)GetTicks()) {
Event* evt = first;
first = first->next;
event_types[evt->type].callback(evt->userdata, (int)(GetTicks() - evt->time));
FreeEvent(evt);
} else {
break;
}
void ForceExceptionCheck(s64 cycles) {
cycles = std::max<s64>(0, cycles);
if (downcount > cycles) {
// downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
// here. Account for cycles already executed by adjusting the g.slice_length
slice_length -= downcount - static_cast<int>(cycles);
downcount = static_cast<int>(cycles);
}
}
void MoveEvents() {
has_ts_events = false;
std::lock_guard<std::recursive_mutex> lock(external_event_section);
// Move events from async queue into main queue
while (ts_first) {
Event* next = ts_first->next;
AddEventToQueue(ts_first);
ts_first = next;
for (Event ev; ts_queue.Pop(ev);) {
ev.fifo_order = event_fifo_id++;
event_queue.emplace_back(std::move(ev));
std::push_heap(event_queue.begin(), event_queue.end(), std::greater<Event>());
}
ts_last = nullptr;
// Move free events to threadsafe pool
while (allocated_ts_events > 0 && event_pool) {
Event* event = event_pool;
event_pool = event->next;
event->next = event_ts_pool;
event_ts_pool = event;
allocated_ts_events--;
}
}
void ForceCheck() {
s64 cycles_executed = g_slice_length - down_count;
global_timer += cycles_executed;
// This will cause us to check for new events immediately.
down_count = 0;
// But let's not eat a bunch more time in Advance() because of this.
g_slice_length = 0;
}
void Advance() {
s64 cycles_executed = g_slice_length - down_count;
MoveEvents();
int cycles_executed = slice_length - downcount;
global_timer += cycles_executed;
down_count = g_slice_length;
slice_length = MAX_SLICE_LENGTH;
if (has_ts_events)
MoveEvents();
ProcessFifoWaitEvents();
is_global_timer_sane = true;
if (!first) {
if (g_slice_length < 10000) {
g_slice_length += 10000;
down_count += g_slice_length;
}
} else {
// Note that events can eat cycles as well.
int target = (int)(first->time - global_timer);
if (target > MAX_SLICE_LENGTH)
target = MAX_SLICE_LENGTH;
const int diff = target - g_slice_length;
g_slice_length += diff;
down_count += diff;
}
if (advance_callback)
advance_callback(static_cast<int>(cycles_executed));
}
void LogPendingEvents() {
Event* event = first;
while (event) {
// LOG_TRACE(Core_Timing, "PENDING: Now: %lld Pending: %lld Type: %d", globalTimer,
// next->time, next->type);
event = event->next;
}
}
void Idle(int max_idle) {
s64 cycles_down = down_count;
if (max_idle != 0 && cycles_down > max_idle)
cycles_down = max_idle;
if (first && cycles_down > 0) {
s64 cycles_executed = g_slice_length - down_count;
s64 cycles_next_event = first->time - global_timer;
if (cycles_next_event < cycles_executed + cycles_down) {
cycles_down = cycles_next_event - cycles_executed;
// Now, now... no time machines, please.
if (cycles_down < 0)
cycles_down = 0;
}
while (!event_queue.empty() && event_queue.front().time <= global_timer) {
Event evt = std::move(event_queue.front());
std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<Event>());
event_queue.pop_back();
evt.type->callback(evt.userdata, global_timer - evt.time);
}
LOG_TRACE(Core_Timing, "Idle for %" PRId64 " cycles! (%f ms)", cycles_down,
cycles_down / (float)(g_clock_rate_arm11 * 0.001f));
is_global_timer_sane = false;
idled_cycles += cycles_down;
down_count -= cycles_down;
if (down_count == 0)
down_count = -1;
}
std::string GetScheduledEventsSummary() {
Event* event = first;
std::string text = "Scheduled events\n";
text.reserve(1000);
while (event) {
unsigned int t = event->type;
if (t >= event_types.size())
LOG_ERROR(Core_Timing, "Invalid event type"); // %i", t);
const char* name = event_types[event->type].name;
if (!name)
name = "[unknown]";
text += Common::StringFromFormat("%s : %i %08x%08x\n", name, (int)event->time,
(u32)(event->userdata >> 32), (u32)(event->userdata));
event = event->next;
// Still events left (scheduled in the future)
if (!event_queue.empty()) {
slice_length = static_cast<int>(
std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH));
}
return text;
downcount = slice_length;
}
} // namespace
void Idle() {
idled_cycles += downcount;
downcount = 0;
}
u64 GetGlobalTimeUs() {
return GetTicks() * 1000000 / BASE_CLOCK_RATE;
}
int GetDowncount() {
return downcount;
}
} // namespace CoreTiming

@ -1,144 +1,191 @@
// Copyright (c) 2012- PPSSPP Project / Dolphin Project.
// Licensed under GPLv2 or any later version
// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
/**
* This is a system to schedule events into the emulated machine's future. Time is measured
* in main CPU clock cycles.
*
* To schedule an event, you first have to register its type. This is where you pass in the
* callback. You then schedule events using the type id you get back.
*
* The int cyclesLate that the callbacks get is how many cycles late it was.
* So to schedule a new event on a regular basis:
* inside callback:
* ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever")
*/
#include <functional>
#include <limits>
#include <string>
#include "common/common_types.h"
#include "common/logging/log.h"
// This is a system to schedule events into the emulated machine's future. Time is measured
// in main CPU clock cycles.
// To schedule an event, you first have to register its type. This is where you pass in the
// callback. You then schedule events using the type id you get back.
// See HW/SystemTimers.cpp for the main part of Dolphin's usage of this scheduler.
// The int cycles_late that the callbacks get is how many cycles late it was.
// So to schedule a new event on a regular basis:
// inside callback:
// ScheduleEvent(periodInCycles - cycles_late, callback, "whatever")
constexpr int BASE_CLOCK_RATE = 383778816; // Switch clock speed is 384MHz docked
extern int g_clock_rate_arm11;
// The timing we get from the assembly is 268,111,855.956 Hz
// It is possible that this number isn't just an integer because the compiler could have
// optimized the multiplication by a multiply-by-constant division.
// Rounding to the nearest integer should be fine
constexpr u64 BASE_CLOCK_RATE = 383778816; // Switch clock speed is 384MHz docked
constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE;
inline s64 msToCycles(int ms) {
return (s64)g_clock_rate_arm11 / 1000 * ms;
// since ms is int there is no way to overflow
return BASE_CLOCK_RATE * static_cast<s64>(ms) / 1000;
}
inline s64 msToCycles(float ms) {
return (s64)(g_clock_rate_arm11 * ms * (0.001f));
return static_cast<s64>(BASE_CLOCK_RATE * (0.001f) * ms);
}
inline s64 msToCycles(double ms) {
return (s64)(g_clock_rate_arm11 * ms * (0.001));
return static_cast<s64>(BASE_CLOCK_RATE * (0.001) * ms);
}
inline s64 usToCycles(float us) {
return (s64)(g_clock_rate_arm11 * us * (0.000001f));
return static_cast<s64>(BASE_CLOCK_RATE * (0.000001f) * us);
}
inline s64 usToCycles(int us) {
return (g_clock_rate_arm11 / 1000000 * (s64)us);
return (BASE_CLOCK_RATE * static_cast<s64>(us) / 1000000);
}
inline s64 usToCycles(s64 us) {
return (g_clock_rate_arm11 / 1000000 * us);
if (us / 1000000 > MAX_VALUE_TO_MULTIPLY) {
LOG_ERROR(Core_Timing, "Integer overflow, use max value");
return std::numeric_limits<s64>::max();
}
if (us > MAX_VALUE_TO_MULTIPLY) {
LOG_DEBUG(Core_Timing, "Time very big, do rounding");
return BASE_CLOCK_RATE * (us / 1000000);
}
return (BASE_CLOCK_RATE * us) / 1000000;
}
inline s64 usToCycles(u64 us) {
return (s64)(g_clock_rate_arm11 / 1000000 * us);
if (us / 1000000 > MAX_VALUE_TO_MULTIPLY) {
LOG_ERROR(Core_Timing, "Integer overflow, use max value");
return std::numeric_limits<s64>::max();
}
if (us > MAX_VALUE_TO_MULTIPLY) {
LOG_DEBUG(Core_Timing, "Time very big, do rounding");
return BASE_CLOCK_RATE * static_cast<s64>(us / 1000000);
}
return (BASE_CLOCK_RATE * static_cast<s64>(us)) / 1000000;
}
inline s64 nsToCycles(float ns) {
return static_cast<s64>(BASE_CLOCK_RATE * (0.000000001f) * ns);
}
inline s64 nsToCycles(int ns) {
return BASE_CLOCK_RATE * static_cast<s64>(ns) / 1000000000;
}
inline s64 nsToCycles(s64 ns) {
if (ns / 1000000000 > MAX_VALUE_TO_MULTIPLY) {
LOG_ERROR(Core_Timing, "Integer overflow, use max value");
return std::numeric_limits<s64>::max();
}
if (ns > MAX_VALUE_TO_MULTIPLY) {
LOG_DEBUG(Core_Timing, "Time very big, do rounding");
return BASE_CLOCK_RATE * (ns / 1000000000);
}
return (BASE_CLOCK_RATE * ns) / 1000000000;
}
inline s64 nsToCycles(u64 ns) {
if (ns / 1000000000 > MAX_VALUE_TO_MULTIPLY) {
LOG_ERROR(Core_Timing, "Integer overflow, use max value");
return std::numeric_limits<s64>::max();
}
if (ns > MAX_VALUE_TO_MULTIPLY) {
LOG_DEBUG(Core_Timing, "Time very big, do rounding");
return BASE_CLOCK_RATE * (static_cast<s64>(ns) / 1000000000);
}
return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
}
inline u64 cyclesToNs(s64 cycles) {
return cycles * 1000000000 / BASE_CLOCK_RATE;
}
inline s64 cyclesToUs(s64 cycles) {
return cycles / (g_clock_rate_arm11 / 1000000);
return cycles * 1000000 / BASE_CLOCK_RATE;
}
inline u64 cyclesToMs(s64 cycles) {
return cycles / (g_clock_rate_arm11 / 1000);
return cycles * 1000 / BASE_CLOCK_RATE;
}
namespace CoreTiming {
/**
* CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
* required to end slice -1 and start slice 0 before the first cycle of code is executed.
*/
void Init();
void Shutdown();
typedef void (*MHzChangeCallback)();
typedef std::function<void(u64 userdata, int cycles_late)> TimedCallback;
/**
* Advance the CPU core by the specified number of ticks (e.g. to simulate CPU execution time)
* @param ticks Number of ticks to advance the CPU core
*/
void AddTicks(u64 ticks);
* This should only be called from the emu thread, if you are calling it any other thread, you are
* doing something evil
*/
u64 GetTicks();
u64 GetIdleTicks();
u64 GetGlobalTimeUs();
void AddTicks(u64 ticks);
struct EventType;
/**
* Registers an event type with the specified name and callback
* @param name Name of the event type
* @param callback Function that will execute when this event fires
* @returns An identifier for the event type that was registered
* Returns the event_type identifier. if name is not unique, it will assert.
*/
int RegisterEvent(const char* name, TimedCallback callback);
/// For save states.
void RestoreRegisterEvent(int event_type, const char* name, TimedCallback callback);
EventType* RegisterEvent(const std::string& name, TimedCallback callback);
void UnregisterAllEvents();
/// userdata MAY NOT CONTAIN POINTERS. userdata might get written and reloaded from disk,
/// when we implement state saves.
/**
* Schedules an event to run after the specified number of cycles,
* with an optional parameter to be passed to the callback handler.
* This must be run ONLY from within the cpu thread.
* @param cycles_into_future The number of cycles after which this event will be fired
* @param event_type The event type to fire, as returned from RegisterEvent
* @param userdata Optional parameter to pass to the callback when fired
* After the first Advance, the slice lengths and the downcount will be reduced whenever an event
* is scheduled earlier than the current values.
* Scheduling from a callback will not update the downcount until the Advance() completes.
*/
void ScheduleEvent(s64 cycles_into_future, int event_type, u64 userdata = 0);
void ScheduleEvent_Threadsafe(s64 cycles_into_future, int event_type, u64 userdata = 0);
void ScheduleEvent_Threadsafe_Immediate(int event_type, u64 userdata = 0);
void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0);
/**
* Unschedules an event with the specified type and userdata
* @param event_type The type of event to unschedule, as returned from RegisterEvent
* @param userdata The userdata that identifies this event, as passed to ScheduleEvent
* @returns The remaining ticks until the next invocation of the event callback
* This is to be called when outside of hle threads, such as the graphics thread, wants to
* schedule things to be executed on the main thread.
* Not that this doesn't change slice_length and thus events scheduled by this might be called
* with a delay of up to MAX_SLICE_LENGTH
*/
s64 UnscheduleEvent(int event_type, u64 userdata);
void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata);
s64 UnscheduleThreadsafeEvent(int event_type, u64 userdata);
void UnscheduleEvent(const EventType* event_type, u64 userdata);
void RemoveEvent(int event_type);
void RemoveThreadsafeEvent(int event_type);
void RemoveAllEvents(int event_type);
bool IsScheduled(int event_type);
/// Runs any pending events and updates downcount for the next slice of cycles
/// We only permit one event of each type in the queue at a time.
void RemoveEvent(const EventType* event_type);
void RemoveNormalAndThreadsafeEvent(const EventType* event_type);
/** Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends
* the previous timing slice and begins the next one, you must Advance from the previous
* slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an
* Advance() is required to initialize the slice length before the first cycle of emulated
* instructions is executed.
*/
void Advance();
void MoveEvents();
void ProcessFifoWaitEvents();
void ForceCheck();
/// Pretend that the main CPU has executed enough cycles to reach the next event.
void Idle(int maxIdle = 0);
void Idle();
/// Clear all pending events. This should ONLY be done on exit or state load.
/// Clear all pending events. This should ONLY be done on exit.
void ClearPendingEvents();
void LogPendingEvents();
void ForceExceptionCheck(s64 cycles);
/// Warning: not included in save states.
void RegisterAdvanceCallback(void (*callback)(int cycles_executed));
void RegisterMHzChangeCallback(MHzChangeCallback callback);
u64 GetGlobalTimeUs();
std::string GetScheduledEventsSummary();
int GetDowncount();
void SetClockFrequencyMHz(int cpu_mhz);
int GetClockFrequencyMHz();
extern int g_slice_length;
} // namespace
} // namespace CoreTiming

@ -26,7 +26,7 @@
namespace Kernel {
/// Event type for the thread wake up event
static int ThreadWakeupEventType;
static CoreTiming::EventType* ThreadWakeupEventType = nullptr;
bool Thread::ShouldWait(Thread* thread) const {
return status != THREADSTATUS_DEAD;
@ -265,8 +265,7 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {
if (nanoseconds == -1)
return;
u64 microseconds = nanoseconds / 1000;
CoreTiming::ScheduleEvent(usToCycles(microseconds), ThreadWakeupEventType, callback_handle);
CoreTiming::ScheduleEvent(nsToCycles(nanoseconds), ThreadWakeupEventType, callback_handle);
}
void Thread::ResumeFromWait() {

@ -14,7 +14,7 @@
namespace Kernel {
/// The event type of the generic timer callback event
static int timer_callback_event_type;
static CoreTiming::EventType* timer_callback_event_type = nullptr;
// TODO(yuriks): This can be removed if Timer objects are explicitly pooled in the future, allowing
// us to simply use a pool index or similar.
static Kernel::HandleTable timer_callback_handle_table;
@ -57,9 +57,7 @@ void Timer::Set(s64 initial, s64 interval) {
// Immediately invoke the callback
Signal(0);
} else {
u64 initial_microseconds = initial / 1000;
CoreTiming::ScheduleEvent(usToCycles(initial_microseconds), timer_callback_event_type,
callback_handle);
CoreTiming::ScheduleEvent(nsToCycles(initial), timer_callback_event_type, callback_handle);
}
}
@ -88,8 +86,7 @@ void Timer::Signal(int cycles_late) {
if (interval_delay != 0) {
// Reschedule the timer with the interval delay
u64 interval_microseconds = interval_delay / 1000;
CoreTiming::ScheduleEvent(usToCycles(interval_microseconds) - cycles_late,
CoreTiming::ScheduleEvent(nsToCycles(interval_delay) - cycles_late,
timer_callback_event_type, callback_handle);
}
}

@ -14,7 +14,7 @@ namespace SharedPage {
SharedPageDef shared_page;
static int update_time_event;
static CoreTiming::EventType* update_time_event;
/// Gets system time in 3DS format. The epoch is Jan 1900, and the unit is millisecond.
static u64 GetSystemTime() {
@ -56,7 +56,7 @@ static void UpdateTimeCallback(u64 userdata, int cycles_late) {
date_time.date_time = GetSystemTime();
date_time.update_tick = CoreTiming::GetTicks();
date_time.tick_to_second_coefficient = g_clock_rate_arm11;
date_time.tick_to_second_coefficient = BASE_CLOCK_RATE;
date_time.tick_offset = 0;
++shared_page.date_time_counter;

@ -31,7 +31,7 @@ Regs g_regs;
/// 268MHz CPU clocks / 60Hz frames per second
const u64 frame_ticks = static_cast<u64>(BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
/// Event id for CoreTiming
static int vblank_event;
static CoreTiming::EventType* vblank_event;
template <typename T>
inline void Read(T& var, const u32 raw_addr) {

@ -1,6 +1,7 @@
set(SRCS
common/param_package.cpp
core/arm/arm_test_common.cpp
core/core_timing.cpp
core/file_sys/path_parser.cpp
core/memory/memory.cpp
glad.cpp

@ -0,0 +1,237 @@
// Copyright 2016 Dolphin Emulator Project / 2017 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <catch.hpp>
#include <array>
#include <bitset>
#include <string>
#include "common/file_util.h"
#include "core/core.h"
#include "core/core_timing.h"
// Numbers are chosen randomly to make sure the correct one is given.
static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
static constexpr int MAX_SLICE_LENGTH = 20000; // Copied from CoreTiming internals
static std::bitset<CB_IDS.size()> callbacks_ran_flags;
static u64 expected_callback = 0;
static s64 lateness = 0;
template <unsigned int IDX>
void CallbackTemplate(u64 userdata, s64 cycles_late) {
static_assert(IDX < CB_IDS.size(), "IDX out of range");
callbacks_ran_flags.set(IDX);
REQUIRE(CB_IDS[IDX] == userdata);
REQUIRE(CB_IDS[IDX] == expected_callback);
REQUIRE(lateness == cycles_late);
}
class ScopeInit final {
public:
ScopeInit() {
CoreTiming::Init();
}
~ScopeInit() {
CoreTiming::Shutdown();
}
};
static void AdvanceAndCheck(u32 idx, int downcount, int expected_lateness = 0,
int cpu_downcount = 0) {
callbacks_ran_flags = 0;
expected_callback = CB_IDS[idx];
lateness = expected_lateness;
CoreTiming::AddTicks(CoreTiming::GetDowncount() -
cpu_downcount); // Pretend we executed X cycles of instructions.
CoreTiming::Advance();
REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags);
REQUIRE(downcount == CoreTiming::GetDowncount());
}
TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
ScopeInit guard;
CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>);
CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>);
CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>);
// Enter slice 0
CoreTiming::Advance();
// D -> B -> C -> A -> E
CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]);
REQUIRE(1000 == CoreTiming::GetDowncount());
CoreTiming::ScheduleEvent(500, cb_b, CB_IDS[1]);
REQUIRE(500 == CoreTiming::GetDowncount());
CoreTiming::ScheduleEvent(800, cb_c, CB_IDS[2]);
REQUIRE(500 == CoreTiming::GetDowncount());
CoreTiming::ScheduleEvent(100, cb_d, CB_IDS[3]);
REQUIRE(100 == CoreTiming::GetDowncount());
CoreTiming::ScheduleEvent(1200, cb_e, CB_IDS[4]);
REQUIRE(100 == CoreTiming::GetDowncount());
AdvanceAndCheck(3, 400);
AdvanceAndCheck(1, 300);
AdvanceAndCheck(2, 200);
AdvanceAndCheck(0, 200);
AdvanceAndCheck(4, MAX_SLICE_LENGTH);
}
TEST_CASE("CoreTiming[Threadsave]", "[core]") {
ScopeInit guard;
CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>);
CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>);
CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>);
// Enter slice 0
CoreTiming::Advance();
// D -> B -> C -> A -> E
CoreTiming::ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]);
// Manually force since ScheduleEventThreadsafe doesn't call it
CoreTiming::ForceExceptionCheck(1000);
REQUIRE(1000 == CoreTiming::GetDowncount());
CoreTiming::ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]);
// Manually force since ScheduleEventThreadsafe doesn't call it
CoreTiming::ForceExceptionCheck(500);
REQUIRE(500 == CoreTiming::GetDowncount());
CoreTiming::ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]);
// Manually force since ScheduleEventThreadsafe doesn't call it
CoreTiming::ForceExceptionCheck(800);
REQUIRE(500 == CoreTiming::GetDowncount());
CoreTiming::ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]);
// Manually force since ScheduleEventThreadsafe doesn't call it
CoreTiming::ForceExceptionCheck(100);
REQUIRE(100 == CoreTiming::GetDowncount());
CoreTiming::ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]);
// Manually force since ScheduleEventThreadsafe doesn't call it
CoreTiming::ForceExceptionCheck(1200);
REQUIRE(100 == CoreTiming::GetDowncount());
AdvanceAndCheck(3, 400);
AdvanceAndCheck(1, 300);
AdvanceAndCheck(2, 200);
AdvanceAndCheck(0, 200);
AdvanceAndCheck(4, MAX_SLICE_LENGTH);
}
namespace SharedSlotTest {
static unsigned int counter = 0;
template <unsigned int ID>
void FifoCallback(u64 userdata, s64 cycles_late) {
static_assert(ID < CB_IDS.size(), "ID out of range");
callbacks_ran_flags.set(ID);
REQUIRE(CB_IDS[ID] == userdata);
REQUIRE(ID == counter);
REQUIRE(lateness == cycles_late);
++counter;
}
} // namespace SharedSlotTest
TEST_CASE("CoreTiming[SharedSlot]", "[core]") {
using namespace SharedSlotTest;
ScopeInit guard;
CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", FifoCallback<0>);
CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", FifoCallback<1>);
CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", FifoCallback<2>);
CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", FifoCallback<3>);
CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", FifoCallback<4>);
CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]);
CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]);
CoreTiming::ScheduleEvent(1000, cb_c, CB_IDS[2]);
CoreTiming::ScheduleEvent(1000, cb_d, CB_IDS[3]);
CoreTiming::ScheduleEvent(1000, cb_e, CB_IDS[4]);
// Enter slice 0
CoreTiming::Advance();
REQUIRE(1000 == CoreTiming::GetDowncount());
callbacks_ran_flags = 0;
counter = 0;
lateness = 0;
CoreTiming::AddTicks(CoreTiming::GetDowncount());
CoreTiming::Advance();
REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount());
REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong());
}
TEST_CASE("CoreTiming[PredictableLateness]", "[core]") {
ScopeInit guard;
CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
// Enter slice 0
CoreTiming::Advance();
CoreTiming::ScheduleEvent(100, cb_a, CB_IDS[0]);
CoreTiming::ScheduleEvent(200, cb_b, CB_IDS[1]);
AdvanceAndCheck(0, 90, 10, -10); // (100 - 10)
AdvanceAndCheck(1, MAX_SLICE_LENGTH, 50, -50);
}
namespace ChainSchedulingTest {
static int reschedules = 0;
static void RescheduleCallback(u64 userdata, s64 cycles_late) {
--reschedules;
REQUIRE(reschedules >= 0);
REQUIRE(lateness == cycles_late);
if (reschedules > 0)
CoreTiming::ScheduleEvent(1000, reinterpret_cast<CoreTiming::EventType*>(userdata),
userdata);
}
} // namespace ChainSchedulingTest
TEST_CASE("CoreTiming[ChainScheduling]", "[core]") {
using namespace ChainSchedulingTest;
ScopeInit guard;
CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>);
CoreTiming::EventType* cb_rs =
CoreTiming::RegisterEvent("callbackReschedule", RescheduleCallback);
// Enter slice 0
CoreTiming::Advance();
CoreTiming::ScheduleEvent(800, cb_a, CB_IDS[0]);
CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]);
CoreTiming::ScheduleEvent(2200, cb_c, CB_IDS[2]);
CoreTiming::ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs));
REQUIRE(800 == CoreTiming::GetDowncount());
reschedules = 3;
AdvanceAndCheck(0, 200); // cb_a
AdvanceAndCheck(1, 1000); // cb_b, cb_rs
REQUIRE(2 == reschedules);
CoreTiming::AddTicks(CoreTiming::GetDowncount());
CoreTiming::Advance(); // cb_rs
REQUIRE(1 == reschedules);
REQUIRE(200 == CoreTiming::GetDowncount());
AdvanceAndCheck(2, 800); // cb_c
CoreTiming::AddTicks(CoreTiming::GetDowncount());
CoreTiming::Advance(); // cb_rs
REQUIRE(0 == reschedules);
REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount());
}