Merge pull request #3986 from ReinUsesLisp/shader-cache
shader_cache: Implement a generic runtime shader cachemaster
commit
5633887569
@ -1,7 +0,0 @@
|
|||||||
// Copyright 2018 yuzu Emulator Project
|
|
||||||
// Licensed under GPLv2 or any later version
|
|
||||||
// Refer to the license.txt file included.
|
|
||||||
|
|
||||||
#include "video_core/rasterizer_cache.h"
|
|
||||||
|
|
||||||
RasterizerCacheObject::~RasterizerCacheObject() = default;
|
|
@ -1,253 +0,0 @@
|
|||||||
// Copyright 2018 yuzu Emulator Project
|
|
||||||
// Licensed under GPLv2 or any later version
|
|
||||||
// Refer to the license.txt file included.
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <mutex>
|
|
||||||
#include <set>
|
|
||||||
#include <unordered_map>
|
|
||||||
|
|
||||||
#include <boost/icl/interval_map.hpp>
|
|
||||||
#include <boost/range/iterator_range_core.hpp>
|
|
||||||
|
|
||||||
#include "common/common_types.h"
|
|
||||||
#include "core/settings.h"
|
|
||||||
#include "video_core/gpu.h"
|
|
||||||
#include "video_core/rasterizer_interface.h"
|
|
||||||
|
|
||||||
class RasterizerCacheObject {
|
|
||||||
public:
|
|
||||||
explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
|
|
||||||
|
|
||||||
virtual ~RasterizerCacheObject();
|
|
||||||
|
|
||||||
VAddr GetCpuAddr() const {
|
|
||||||
return cpu_addr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Gets the size of the shader in guest memory, required for cache management
|
|
||||||
virtual std::size_t GetSizeInBytes() const = 0;
|
|
||||||
|
|
||||||
/// Sets whether the cached object should be considered registered
|
|
||||||
void SetIsRegistered(bool registered) {
|
|
||||||
is_registered = registered;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns true if the cached object is registered
|
|
||||||
bool IsRegistered() const {
|
|
||||||
return is_registered;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns true if the cached object is dirty
|
|
||||||
bool IsDirty() const {
|
|
||||||
return is_dirty;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns ticks from when this cached object was last modified
|
|
||||||
u64 GetLastModifiedTicks() const {
|
|
||||||
return last_modified_ticks;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Marks an object as recently modified, used to specify whether it is clean or dirty
|
|
||||||
template <class T>
|
|
||||||
void MarkAsModified(bool dirty, T& cache) {
|
|
||||||
is_dirty = dirty;
|
|
||||||
last_modified_ticks = cache.GetModifiedTicks();
|
|
||||||
}
|
|
||||||
|
|
||||||
void SetMemoryMarked(bool is_memory_marked_) {
|
|
||||||
is_memory_marked = is_memory_marked_;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool IsMemoryMarked() const {
|
|
||||||
return is_memory_marked;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SetSyncPending(bool is_sync_pending_) {
|
|
||||||
is_sync_pending = is_sync_pending_;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool IsSyncPending() const {
|
|
||||||
return is_sync_pending;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
bool is_registered{}; ///< Whether the object is currently registered with the cache
|
|
||||||
bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
|
|
||||||
bool is_memory_marked{}; ///< Whether the object is marking rasterizer memory.
|
|
||||||
bool is_sync_pending{}; ///< Whether the object is pending deletion.
|
|
||||||
u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
|
|
||||||
VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
|
|
||||||
};
|
|
||||||
|
|
||||||
template <class T>
|
|
||||||
class RasterizerCache : NonCopyable {
|
|
||||||
friend class RasterizerCacheObject;
|
|
||||||
|
|
||||||
public:
|
|
||||||
explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
|
|
||||||
|
|
||||||
/// Write any cached resources overlapping the specified region back to memory
|
|
||||||
void FlushRegion(VAddr addr, std::size_t size) {
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
|
|
||||||
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
|
|
||||||
for (auto& object : objects) {
|
|
||||||
FlushObject(object);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Mark the specified region as being invalidated
|
|
||||||
void InvalidateRegion(VAddr addr, u64 size) {
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
|
|
||||||
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
|
|
||||||
for (auto& object : objects) {
|
|
||||||
if (!object->IsRegistered()) {
|
|
||||||
// Skip duplicates
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
Unregister(object);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void OnCPUWrite(VAddr addr, std::size_t size) {
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
|
|
||||||
for (const auto& object : GetSortedObjectsFromRegion(addr, size)) {
|
|
||||||
if (object->IsRegistered()) {
|
|
||||||
UnmarkMemory(object);
|
|
||||||
object->SetSyncPending(true);
|
|
||||||
marked_for_unregister.emplace_back(object);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void SyncGuestHost() {
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
|
|
||||||
for (const auto& object : marked_for_unregister) {
|
|
||||||
if (object->IsRegistered()) {
|
|
||||||
object->SetSyncPending(false);
|
|
||||||
Unregister(object);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
marked_for_unregister.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Invalidates everything in the cache
|
|
||||||
void InvalidateAll() {
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
|
|
||||||
while (interval_cache.begin() != interval_cache.end()) {
|
|
||||||
Unregister(*interval_cache.begin()->second.begin());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
/// Tries to get an object from the cache with the specified cache address
|
|
||||||
T TryGet(VAddr addr) const {
|
|
||||||
const auto iter = map_cache.find(addr);
|
|
||||||
if (iter != map_cache.end())
|
|
||||||
return iter->second;
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Register an object into the cache
|
|
||||||
virtual void Register(const T& object) {
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
|
|
||||||
object->SetIsRegistered(true);
|
|
||||||
interval_cache.add({GetInterval(object), ObjectSet{object}});
|
|
||||||
map_cache.insert({object->GetCpuAddr(), object});
|
|
||||||
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
|
|
||||||
object->SetMemoryMarked(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Unregisters an object from the cache
|
|
||||||
virtual void Unregister(const T& object) {
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
|
|
||||||
UnmarkMemory(object);
|
|
||||||
object->SetIsRegistered(false);
|
|
||||||
if (object->IsSyncPending()) {
|
|
||||||
marked_for_unregister.remove(object);
|
|
||||||
object->SetSyncPending(false);
|
|
||||||
}
|
|
||||||
const VAddr addr = object->GetCpuAddr();
|
|
||||||
interval_cache.subtract({GetInterval(object), ObjectSet{object}});
|
|
||||||
map_cache.erase(addr);
|
|
||||||
}
|
|
||||||
|
|
||||||
void UnmarkMemory(const T& object) {
|
|
||||||
if (!object->IsMemoryMarked()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
|
|
||||||
object->SetMemoryMarked(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns a ticks counter used for tracking when cached objects were last modified
|
|
||||||
u64 GetModifiedTicks() {
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
|
|
||||||
return ++modified_ticks;
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void FlushObjectInner(const T& object) = 0;
|
|
||||||
|
|
||||||
/// Flushes the specified object, updating appropriate cache state as needed
|
|
||||||
void FlushObject(const T& object) {
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
|
|
||||||
if (!object->IsDirty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
FlushObjectInner(object);
|
|
||||||
object->MarkAsModified(false, *this);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::recursive_mutex mutex;
|
|
||||||
|
|
||||||
private:
|
|
||||||
/// Returns a list of cached objects from the specified memory region, ordered by access time
|
|
||||||
std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
|
|
||||||
if (size == 0) {
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<T> objects;
|
|
||||||
const ObjectInterval interval{addr, addr + size};
|
|
||||||
for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) {
|
|
||||||
for (auto& cached_object : pair.second) {
|
|
||||||
if (!cached_object) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
objects.push_back(cached_object);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool {
|
|
||||||
return a->GetLastModifiedTicks() < b->GetLastModifiedTicks();
|
|
||||||
});
|
|
||||||
|
|
||||||
return objects;
|
|
||||||
}
|
|
||||||
|
|
||||||
using ObjectSet = std::set<T>;
|
|
||||||
using ObjectCache = std::unordered_map<VAddr, T>;
|
|
||||||
using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
|
|
||||||
using ObjectInterval = typename IntervalCache::interval_type;
|
|
||||||
|
|
||||||
static auto GetInterval(const T& object) {
|
|
||||||
return ObjectInterval::right_open(object->GetCpuAddr(),
|
|
||||||
object->GetCpuAddr() + object->GetSizeInBytes());
|
|
||||||
}
|
|
||||||
|
|
||||||
ObjectCache map_cache;
|
|
||||||
IntervalCache interval_cache; ///< Cache of objects
|
|
||||||
u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
|
|
||||||
VideoCore::RasterizerInterface& rasterizer;
|
|
||||||
std::list<T> marked_for_unregister;
|
|
||||||
};
|
|
@ -0,0 +1,228 @@
|
|||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <memory>
|
||||||
|
#include <mutex>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/rasterizer_interface.h"
|
||||||
|
|
||||||
|
namespace VideoCommon {
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
class ShaderCache {
|
||||||
|
static constexpr u64 PAGE_SHIFT = 14;
|
||||||
|
|
||||||
|
struct Entry {
|
||||||
|
VAddr addr_start;
|
||||||
|
VAddr addr_end;
|
||||||
|
T* data;
|
||||||
|
|
||||||
|
bool is_memory_marked = true;
|
||||||
|
|
||||||
|
constexpr bool Overlaps(VAddr start, VAddr end) const noexcept {
|
||||||
|
return start < addr_end && addr_start < end;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
virtual ~ShaderCache() = default;
|
||||||
|
|
||||||
|
/// @brief Removes shaders inside a given region
|
||||||
|
/// @note Checks for ranges
|
||||||
|
/// @param addr Start address of the invalidation
|
||||||
|
/// @param size Number of bytes of the invalidation
|
||||||
|
void InvalidateRegion(VAddr addr, std::size_t size) {
|
||||||
|
std::scoped_lock lock{invalidation_mutex};
|
||||||
|
InvalidatePagesInRegion(addr, size);
|
||||||
|
RemovePendingShaders();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @brief Unmarks a memory region as cached and marks it for removal
|
||||||
|
/// @param addr Start address of the CPU write operation
|
||||||
|
/// @param size Number of bytes of the CPU write operation
|
||||||
|
void OnCPUWrite(VAddr addr, std::size_t size) {
|
||||||
|
std::lock_guard lock{invalidation_mutex};
|
||||||
|
InvalidatePagesInRegion(addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @brief Flushes delayed removal operations
|
||||||
|
void SyncGuestHost() {
|
||||||
|
std::scoped_lock lock{invalidation_mutex};
|
||||||
|
RemovePendingShaders();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @brief Tries to obtain a cached shader starting in a given address
|
||||||
|
/// @note Doesn't check for ranges, the given address has to be the start of the shader
|
||||||
|
/// @param addr Start address of the shader, this doesn't cache for region
|
||||||
|
/// @return Pointer to a valid shader, nullptr when nothing is found
|
||||||
|
T* TryGet(VAddr addr) const {
|
||||||
|
std::scoped_lock lock{lookup_mutex};
|
||||||
|
|
||||||
|
const auto it = lookup_cache.find(addr);
|
||||||
|
if (it == lookup_cache.end()) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
return it->second->data;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
|
||||||
|
|
||||||
|
/// @brief Register in the cache a given entry
|
||||||
|
/// @param data Shader to store in the cache
|
||||||
|
/// @param addr Start address of the shader that will be registered
|
||||||
|
/// @param size Size in bytes of the shader
|
||||||
|
void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) {
|
||||||
|
std::scoped_lock lock{invalidation_mutex, lookup_mutex};
|
||||||
|
|
||||||
|
const VAddr addr_end = addr + size;
|
||||||
|
Entry* const entry = NewEntry(addr, addr_end, data.get());
|
||||||
|
|
||||||
|
const u64 page_end = addr_end >> PAGE_SHIFT;
|
||||||
|
for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
|
||||||
|
invalidation_cache[page].push_back(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
storage.push_back(std::move(data));
|
||||||
|
|
||||||
|
rasterizer.UpdatePagesCachedCount(addr, size, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @brief Called when a shader is going to be removed
|
||||||
|
/// @param shader Shader that will be removed
|
||||||
|
/// @pre invalidation_cache is locked
|
||||||
|
/// @pre lookup_mutex is locked
|
||||||
|
virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// @brief Invalidate pages in a given region
|
||||||
|
/// @pre invalidation_mutex is locked
|
||||||
|
void InvalidatePagesInRegion(VAddr addr, std::size_t size) {
|
||||||
|
const VAddr addr_end = addr + size;
|
||||||
|
const u64 page_end = addr_end >> PAGE_SHIFT;
|
||||||
|
for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
|
||||||
|
const auto it = invalidation_cache.find(page);
|
||||||
|
if (it == invalidation_cache.end()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Entry*>& entries = it->second;
|
||||||
|
InvalidatePageEntries(entries, addr, addr_end);
|
||||||
|
|
||||||
|
// If there's nothing else in this page, remove it to avoid overpopulating the hash map.
|
||||||
|
if (entries.empty()) {
|
||||||
|
invalidation_cache.erase(it);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @brief Remove shaders marked for deletion
|
||||||
|
/// @pre invalidation_mutex is locked
|
||||||
|
void RemovePendingShaders() {
|
||||||
|
if (marked_for_removal.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::scoped_lock lock{lookup_mutex};
|
||||||
|
|
||||||
|
std::vector<T*> removed_shaders;
|
||||||
|
removed_shaders.reserve(marked_for_removal.size());
|
||||||
|
|
||||||
|
for (Entry* const entry : marked_for_removal) {
|
||||||
|
if (lookup_cache.erase(entry->addr_start) > 0) {
|
||||||
|
removed_shaders.push_back(entry->data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
marked_for_removal.clear();
|
||||||
|
|
||||||
|
if (!removed_shaders.empty()) {
|
||||||
|
RemoveShadersFromStorage(std::move(removed_shaders));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @brief Invalidates entries in a given range for the passed page
|
||||||
|
/// @param entries Vector of entries in the page, it will be modified on overlaps
|
||||||
|
/// @param addr Start address of the invalidation
|
||||||
|
/// @param addr_end Non-inclusive end address of the invalidation
|
||||||
|
/// @pre invalidation_mutex is locked
|
||||||
|
void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
|
||||||
|
auto it = entries.begin();
|
||||||
|
while (it != entries.end()) {
|
||||||
|
Entry* const entry = *it;
|
||||||
|
if (!entry->Overlaps(addr, addr_end)) {
|
||||||
|
++it;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
UnmarkMemory(entry);
|
||||||
|
marked_for_removal.push_back(entry);
|
||||||
|
|
||||||
|
it = entries.erase(it);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @brief Unmarks an entry from the rasterizer cache
|
||||||
|
/// @param entry Entry to unmark from memory
|
||||||
|
void UnmarkMemory(Entry* entry) {
|
||||||
|
if (!entry->is_memory_marked) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
entry->is_memory_marked = false;
|
||||||
|
|
||||||
|
const VAddr addr = entry->addr_start;
|
||||||
|
const std::size_t size = entry->addr_end - addr;
|
||||||
|
rasterizer.UpdatePagesCachedCount(addr, size, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @brief Removes a vector of shaders from a list
|
||||||
|
/// @param removed_shaders Shaders to be removed from the storage, it can contain duplicates
|
||||||
|
/// @pre invalidation_mutex is locked
|
||||||
|
/// @pre lookup_mutex is locked
|
||||||
|
void RemoveShadersFromStorage(std::vector<T*> removed_shaders) {
|
||||||
|
// Remove duplicates
|
||||||
|
std::sort(removed_shaders.begin(), removed_shaders.end());
|
||||||
|
removed_shaders.erase(std::unique(removed_shaders.begin(), removed_shaders.end()),
|
||||||
|
removed_shaders.end());
|
||||||
|
|
||||||
|
// Now that there are no duplicates, we can notify removals
|
||||||
|
for (T* const shader : removed_shaders) {
|
||||||
|
OnShaderRemoval(shader);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove them from the cache
|
||||||
|
const auto is_removed = [&removed_shaders](std::unique_ptr<T>& shader) {
|
||||||
|
return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
|
||||||
|
removed_shaders.end();
|
||||||
|
};
|
||||||
|
storage.erase(std::remove_if(storage.begin(), storage.end(), is_removed), storage.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @brief Creates a new entry in the lookup cache and returns its pointer
|
||||||
|
/// @pre lookup_mutex is locked
|
||||||
|
Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) {
|
||||||
|
auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
|
||||||
|
Entry* const entry_pointer = entry.get();
|
||||||
|
|
||||||
|
lookup_cache.emplace(addr, std::move(entry));
|
||||||
|
return entry_pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
VideoCore::RasterizerInterface& rasterizer;
|
||||||
|
|
||||||
|
mutable std::mutex lookup_mutex;
|
||||||
|
std::mutex invalidation_mutex;
|
||||||
|
|
||||||
|
std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
|
||||||
|
std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
|
||||||
|
std::vector<std::unique_ptr<T>> storage;
|
||||||
|
std::vector<Entry*> marked_for_removal;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace VideoCommon
|
Loading…
Reference in New Issue