Merge pull request #3677 from FernandoS27/better-sync

Introduce Predictive Flushing and Improve ASYNC GPU
merge-requests/60/head
bunnei 2020-04-22 22:09:38 +07:00 committed by GitHub
commit bf2ddb8fd5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
41 changed files with 1193 additions and 63 deletions

@ -92,7 +92,7 @@ void LogSettings() {
LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); LogSetting("Renderer_GPUAccuracyLevel", Settings::values.gpu_accuracy);
LogSetting("Renderer_UseAsynchronousGpuEmulation", LogSetting("Renderer_UseAsynchronousGpuEmulation",
Settings::values.use_asynchronous_gpu_emulation); Settings::values.use_asynchronous_gpu_emulation);
LogSetting("Renderer_UseVsync", Settings::values.use_vsync); LogSetting("Renderer_UseVsync", Settings::values.use_vsync);
@ -109,4 +109,12 @@ void LogSettings() {
LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local); LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local);
} }
bool IsGPULevelExtreme() {
return values.gpu_accuracy == GPUAccuracy::Extreme;
}
bool IsGPULevelHigh() {
return values.gpu_accuracy == GPUAccuracy::Extreme || values.gpu_accuracy == GPUAccuracy::High;
}
} // namespace Settings } // namespace Settings

@ -376,6 +376,12 @@ enum class RendererBackend {
Vulkan = 1, Vulkan = 1,
}; };
enum class GPUAccuracy : u32 {
Normal = 0,
High = 1,
Extreme = 2,
};
struct Values { struct Values {
// System // System
bool use_docked_mode; bool use_docked_mode;
@ -436,7 +442,7 @@ struct Values {
bool use_frame_limit; bool use_frame_limit;
u16 frame_limit; u16 frame_limit;
bool use_disk_shader_cache; bool use_disk_shader_cache;
bool use_accurate_gpu_emulation; GPUAccuracy gpu_accuracy;
bool use_asynchronous_gpu_emulation; bool use_asynchronous_gpu_emulation;
bool use_vsync; bool use_vsync;
bool force_30fps_mode; bool force_30fps_mode;
@ -480,6 +486,9 @@ struct Values {
std::map<u64, std::vector<std::string>> disabled_addons; std::map<u64, std::vector<std::string>> disabled_addons;
} extern values; } extern values;
bool IsGPULevelExtreme();
bool IsGPULevelHigh();
void Apply(); void Apply();
void LogSettings(); void LogSettings();
} // namespace Settings } // namespace Settings

@ -56,6 +56,18 @@ static const char* TranslateRenderer(Settings::RendererBackend backend) {
return "Unknown"; return "Unknown";
} }
static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) {
switch (backend) {
case Settings::GPUAccuracy::Normal:
return "Normal";
case Settings::GPUAccuracy::High:
return "High";
case Settings::GPUAccuracy::Extreme:
return "Extreme";
}
return "Unknown";
}
u64 GetTelemetryId() { u64 GetTelemetryId() {
u64 telemetry_id{}; u64 telemetry_id{};
const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) + const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) +
@ -184,8 +196,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit); AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit);
AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit); AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit);
AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
AddField(field_type, "Renderer_UseAccurateGpuEmulation", AddField(field_type, "Renderer_GPUAccuracyLevel",
Settings::values.use_accurate_gpu_emulation); TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy));
AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
Settings::values.use_asynchronous_gpu_emulation); Settings::values.use_asynchronous_gpu_emulation);
AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync); AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync);

@ -23,6 +23,7 @@ add_library(video_core STATIC
engines/shader_bytecode.h engines/shader_bytecode.h
engines/shader_header.h engines/shader_header.h
engines/shader_type.h engines/shader_type.h
fence_manager.h
gpu.cpp gpu.cpp
gpu.h gpu.h
gpu_asynch.cpp gpu_asynch.cpp
@ -51,6 +52,8 @@ add_library(video_core STATIC
renderer_opengl/gl_buffer_cache.h renderer_opengl/gl_buffer_cache.h
renderer_opengl/gl_device.cpp renderer_opengl/gl_device.cpp
renderer_opengl/gl_device.h renderer_opengl/gl_device.h
renderer_opengl/gl_fence_manager.cpp
renderer_opengl/gl_fence_manager.h
renderer_opengl/gl_framebuffer_cache.cpp renderer_opengl/gl_framebuffer_cache.cpp
renderer_opengl/gl_framebuffer_cache.h renderer_opengl/gl_framebuffer_cache.h
renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.cpp
@ -176,6 +179,8 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_descriptor_pool.h
renderer_vulkan/vk_device.cpp renderer_vulkan/vk_device.cpp
renderer_vulkan/vk_device.h renderer_vulkan/vk_device.h
renderer_vulkan/vk_fence_manager.cpp
renderer_vulkan/vk_fence_manager.h
renderer_vulkan/vk_graphics_pipeline.cpp renderer_vulkan/vk_graphics_pipeline.cpp
renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_graphics_pipeline.h
renderer_vulkan/vk_image.cpp renderer_vulkan/vk_image.cpp

@ -5,6 +5,7 @@
#pragma once #pragma once
#include <array> #include <array>
#include <list>
#include <memory> #include <memory>
#include <mutex> #include <mutex>
#include <unordered_map> #include <unordered_map>
@ -18,8 +19,10 @@
#include "common/alignment.h" #include "common/alignment.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/logging/log.h"
#include "core/core.h" #include "core/core.h"
#include "core/memory.h" #include "core/memory.h"
#include "core/settings.h"
#include "video_core/buffer_cache/buffer_block.h" #include "video_core/buffer_cache/buffer_block.h"
#include "video_core/buffer_cache/map_interval.h" #include "video_core/buffer_cache/map_interval.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
@ -79,6 +82,9 @@ public:
auto map = MapAddress(block, gpu_addr, cpu_addr, size); auto map = MapAddress(block, gpu_addr, cpu_addr, size);
if (is_written) { if (is_written) {
map->MarkAsModified(true, GetModifiedTicks()); map->MarkAsModified(true, GetModifiedTicks());
if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
MarkForAsyncFlush(map);
}
if (!map->IsWritten()) { if (!map->IsWritten()) {
map->MarkAsWritten(true); map->MarkAsWritten(true);
MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
@ -137,11 +143,22 @@ public:
}); });
for (auto& object : objects) { for (auto& object : objects) {
if (object->IsModified() && object->IsRegistered()) { if (object->IsModified() && object->IsRegistered()) {
mutex.unlock();
FlushMap(object); FlushMap(object);
mutex.lock();
} }
} }
} }
bool MustFlushRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
const std::vector<MapInterval> objects = GetMapsInRange(addr, size);
return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) {
return map->IsModified() && map->IsRegistered();
});
}
/// Mark the specified region as being invalidated /// Mark the specified region as being invalidated
void InvalidateRegion(VAddr addr, u64 size) { void InvalidateRegion(VAddr addr, u64 size) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
@ -154,6 +171,77 @@ public:
} }
} }
void OnCPUWrite(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
for (const auto& object : GetMapsInRange(addr, size)) {
if (object->IsMemoryMarked() && object->IsRegistered()) {
UnmarkMemory(object);
object->SetSyncPending(true);
marked_for_unregister.emplace_back(object);
}
}
}
void SyncGuestHost() {
std::lock_guard lock{mutex};
for (const auto& object : marked_for_unregister) {
if (object->IsRegistered()) {
object->SetSyncPending(false);
Unregister(object);
}
}
marked_for_unregister.clear();
}
void CommitAsyncFlushes() {
if (uncommitted_flushes) {
auto commit_list = std::make_shared<std::list<MapInterval>>();
for (auto& map : *uncommitted_flushes) {
if (map->IsRegistered() && map->IsModified()) {
// TODO(Blinkhawk): Implement backend asynchronous flushing
// AsyncFlushMap(map)
commit_list->push_back(map);
}
}
if (!commit_list->empty()) {
committed_flushes.push_back(commit_list);
} else {
committed_flushes.emplace_back();
}
} else {
committed_flushes.emplace_back();
}
uncommitted_flushes.reset();
}
bool ShouldWaitAsyncFlushes() const {
return !committed_flushes.empty() && committed_flushes.front() != nullptr;
}
bool HasUncommittedFlushes() const {
return uncommitted_flushes != nullptr;
}
void PopAsyncFlushes() {
if (committed_flushes.empty()) {
return;
}
auto& flush_list = committed_flushes.front();
if (!flush_list) {
committed_flushes.pop_front();
return;
}
for (MapInterval& map : *flush_list) {
if (map->IsRegistered()) {
// TODO(Blinkhawk): Replace this for reading the asynchronous flush
FlushMap(map);
}
}
committed_flushes.pop_front();
}
virtual BufferType GetEmptyBuffer(std::size_t size) = 0; virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
protected: protected:
@ -196,17 +284,30 @@ protected:
const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
mapped_addresses.insert({interval, new_map}); mapped_addresses.insert({interval, new_map});
rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
new_map->SetMemoryMarked(true);
if (inherit_written) { if (inherit_written) {
MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
new_map->MarkAsWritten(true); new_map->MarkAsWritten(true);
} }
} }
/// Unregisters an object from the cache void UnmarkMemory(const MapInterval& map) {
void Unregister(MapInterval& map) { if (!map->IsMemoryMarked()) {
return;
}
const std::size_t size = map->GetEnd() - map->GetStart(); const std::size_t size = map->GetEnd() - map->GetStart();
rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
map->SetMemoryMarked(false);
}
/// Unregisters an object from the cache
void Unregister(const MapInterval& map) {
UnmarkMemory(map);
map->MarkAsRegistered(false); map->MarkAsRegistered(false);
if (map->IsSyncPending()) {
marked_for_unregister.remove(map);
map->SetSyncPending(false);
}
if (map->IsWritten()) { if (map->IsWritten()) {
UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
} }
@ -264,6 +365,9 @@ private:
MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
if (modified_inheritance) { if (modified_inheritance) {
new_map->MarkAsModified(true, GetModifiedTicks()); new_map->MarkAsModified(true, GetModifiedTicks());
if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
MarkForAsyncFlush(new_map);
}
} }
Register(new_map, write_inheritance); Register(new_map, write_inheritance);
return new_map; return new_map;
@ -450,6 +554,13 @@ private:
return false; return false;
} }
void MarkForAsyncFlush(MapInterval& map) {
if (!uncommitted_flushes) {
uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>();
}
uncommitted_flushes->insert(map);
}
VideoCore::RasterizerInterface& rasterizer; VideoCore::RasterizerInterface& rasterizer;
Core::System& system; Core::System& system;
@ -479,6 +590,10 @@ private:
u64 modified_ticks = 0; u64 modified_ticks = 0;
std::vector<u8> staging_buffer; std::vector<u8> staging_buffer;
std::list<MapInterval> marked_for_unregister;
std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{};
std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes;
std::recursive_mutex mutex; std::recursive_mutex mutex;
}; };

@ -46,6 +46,22 @@ public:
return is_registered; return is_registered;
} }
void SetMemoryMarked(bool is_memory_marked_) {
is_memory_marked = is_memory_marked_;
}
bool IsMemoryMarked() const {
return is_memory_marked;
}
void SetSyncPending(bool is_sync_pending_) {
is_sync_pending = is_sync_pending_;
}
bool IsSyncPending() const {
return is_sync_pending;
}
VAddr GetStart() const { VAddr GetStart() const {
return start; return start;
} }
@ -83,6 +99,8 @@ private:
bool is_written{}; bool is_written{};
bool is_modified{}; bool is_modified{};
bool is_registered{}; bool is_registered{};
bool is_memory_marked{};
bool is_sync_pending{};
u64 ticks{}; u64 ticks{};
}; };

@ -21,6 +21,7 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128,
void DmaPusher::DispatchCalls() { void DmaPusher::DispatchCalls() {
MICROPROFILE_SCOPE(DispatchCalls); MICROPROFILE_SCOPE(DispatchCalls);
gpu.SyncGuestHost();
// On entering GPU code, assume all memory may be touched by the ARM core. // On entering GPU code, assume all memory may be touched by the ARM core.
gpu.Maxwell3D().OnMemoryWrite(); gpu.Maxwell3D().OnMemoryWrite();
@ -32,6 +33,8 @@ void DmaPusher::DispatchCalls() {
} }
} }
gpu.FlushCommands(); gpu.FlushCommands();
gpu.SyncGuestHost();
gpu.OnCommandListEnd();
} }
bool DmaPusher::Step() { bool DmaPusher::Step() {

@ -404,7 +404,11 @@ void Maxwell3D::ProcessQueryGet() {
switch (regs.query.query_get.operation) { switch (regs.query.query_get.operation) {
case Regs::QueryOperation::Release: case Regs::QueryOperation::Release:
if (regs.query.query_get.fence == 1) {
rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
} else {
StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
}
break; break;
case Regs::QueryOperation::Acquire: case Regs::QueryOperation::Acquire:
// TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
@ -483,7 +487,7 @@ void Maxwell3D::ProcessSyncPoint() {
const u32 increment = regs.sync_info.increment.Value(); const u32 increment = regs.sync_info.increment.Value();
[[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value(); [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
if (increment) { if (increment) {
system.GPU().IncrementSyncPoint(sync_point); rasterizer.SignalSyncPoint(sync_point);
} }
} }

@ -104,8 +104,13 @@ void MaxwellDMA::HandleCopy() {
write_buffer.resize(dst_size); write_buffer.resize(dst_size);
} }
if (Settings::IsGPULevelExtreme()) {
memory_manager.ReadBlock(source, read_buffer.data(), src_size); memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
} else {
memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
}
Texture::UnswizzleSubrect( Texture::UnswizzleSubrect(
regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel, regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
@ -136,7 +141,7 @@ void MaxwellDMA::HandleCopy() {
write_buffer.resize(dst_size); write_buffer.resize(dst_size);
} }
if (Settings::values.use_accurate_gpu_emulation) { if (Settings::IsGPULevelExtreme()) {
memory_manager.ReadBlock(source, read_buffer.data(), src_size); memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
} else { } else {

@ -0,0 +1,170 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <array>
#include <memory>
#include <queue>
#include "common/assert.h"
#include "common/common_types.h"
#include "core/core.h"
#include "core/memory.h"
#include "core/settings.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
namespace VideoCommon {
class FenceBase {
public:
FenceBase(u32 payload, bool is_stubbed)
: address{}, payload{payload}, is_semaphore{false}, is_stubbed{is_stubbed} {}
FenceBase(GPUVAddr address, u32 payload, bool is_stubbed)
: address{address}, payload{payload}, is_semaphore{true}, is_stubbed{is_stubbed} {}
GPUVAddr GetAddress() const {
return address;
}
u32 GetPayload() const {
return payload;
}
bool IsSemaphore() const {
return is_semaphore;
}
private:
GPUVAddr address;
u32 payload;
bool is_semaphore;
protected:
bool is_stubbed;
};
template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>
class FenceManager {
public:
void SignalSemaphore(GPUVAddr addr, u32 value) {
TryReleasePendingFences();
const bool should_flush = ShouldFlush();
CommitAsyncFlushes();
TFence new_fence = CreateFence(addr, value, !should_flush);
fences.push(new_fence);
QueueFence(new_fence);
if (should_flush) {
rasterizer.FlushCommands();
}
rasterizer.SyncGuestHost();
}
void SignalSyncPoint(u32 value) {
TryReleasePendingFences();
const bool should_flush = ShouldFlush();
CommitAsyncFlushes();
TFence new_fence = CreateFence(value, !should_flush);
fences.push(new_fence);
QueueFence(new_fence);
if (should_flush) {
rasterizer.FlushCommands();
}
rasterizer.SyncGuestHost();
}
void WaitPendingFences() {
auto& gpu{system.GPU()};
auto& memory_manager{gpu.MemoryManager()};
while (!fences.empty()) {
TFence& current_fence = fences.front();
if (ShouldWait()) {
WaitFence(current_fence);
}
PopAsyncFlushes();
if (current_fence->IsSemaphore()) {
memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
} else {
gpu.IncrementSyncPoint(current_fence->GetPayload());
}
fences.pop();
}
}
protected:
FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
TTextureCache& texture_cache, TTBufferCache& buffer_cache,
TQueryCache& query_cache)
: system{system}, rasterizer{rasterizer}, texture_cache{texture_cache},
buffer_cache{buffer_cache}, query_cache{query_cache} {}
virtual ~FenceManager() {}
/// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
/// true
virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
/// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0;
/// Queues a fence into the backend if the fence isn't stubbed.
virtual void QueueFence(TFence& fence) = 0;
/// Notifies that the backend fence has been signaled/reached in host GPU.
virtual bool IsFenceSignaled(TFence& fence) const = 0;
/// Waits until a fence has been signalled by the host GPU.
virtual void WaitFence(TFence& fence) = 0;
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
TTextureCache& texture_cache;
TTBufferCache& buffer_cache;
TQueryCache& query_cache;
private:
void TryReleasePendingFences() {
auto& gpu{system.GPU()};
auto& memory_manager{gpu.MemoryManager()};
while (!fences.empty()) {
TFence& current_fence = fences.front();
if (ShouldWait() && !IsFenceSignaled(current_fence)) {
return;
}
PopAsyncFlushes();
if (current_fence->IsSemaphore()) {
memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
} else {
gpu.IncrementSyncPoint(current_fence->GetPayload());
}
fences.pop();
}
}
bool ShouldWait() const {
return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
query_cache.ShouldWaitAsyncFlushes();
}
bool ShouldFlush() const {
return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() ||
query_cache.HasUncommittedFlushes();
}
void PopAsyncFlushes() {
texture_cache.PopAsyncFlushes();
buffer_cache.PopAsyncFlushes();
query_cache.PopAsyncFlushes();
}
void CommitAsyncFlushes() {
texture_cache.CommitAsyncFlushes();
buffer_cache.CommitAsyncFlushes();
query_cache.CommitAsyncFlushes();
}
std::queue<TFence> fences;
};
} // namespace VideoCommon

@ -125,6 +125,28 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
return true; return true;
} }
u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
std::unique_lock lck{flush_request_mutex};
const u64 fence = ++last_flush_fence;
flush_requests.emplace_back(fence, addr, size);
return fence;
}
void GPU::TickWork() {
std::unique_lock lck{flush_request_mutex};
while (!flush_requests.empty()) {
auto& request = flush_requests.front();
const u64 fence = request.fence;
const VAddr addr = request.addr;
const std::size_t size = request.size;
flush_requests.pop_front();
flush_request_mutex.unlock();
renderer->Rasterizer().FlushRegion(addr, size);
current_flush_fence.store(fence);
flush_request_mutex.lock();
}
}
u64 GPU::GetTicks() const { u64 GPU::GetTicks() const {
// This values were reversed engineered by fincs from NVN // This values were reversed engineered by fincs from NVN
// The gpu clock is reported in units of 385/625 nanoseconds // The gpu clock is reported in units of 385/625 nanoseconds
@ -142,6 +164,13 @@ void GPU::FlushCommands() {
renderer->Rasterizer().FlushCommands(); renderer->Rasterizer().FlushCommands();
} }
void GPU::SyncGuestHost() {
renderer->Rasterizer().SyncGuestHost();
}
void GPU::OnCommandListEnd() {
renderer->Rasterizer().ReleaseFences();
}
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
// So the values you see in docs might be multiplied by 4. // So the values you see in docs might be multiplied by 4.

@ -155,7 +155,23 @@ public:
/// Calls a GPU method. /// Calls a GPU method.
void CallMethod(const MethodCall& method_call); void CallMethod(const MethodCall& method_call);
/// Flush all current written commands into the host GPU for execution.
void FlushCommands(); void FlushCommands();
/// Synchronizes CPU writes with Host GPU memory.
void SyncGuestHost();
/// Signal the ending of command list.
virtual void OnCommandListEnd();
/// Request a host GPU memory flush from the CPU.
u64 RequestFlush(VAddr addr, std::size_t size);
/// Obtains current flush request fence id.
u64 CurrentFlushRequestFence() const {
return current_flush_fence.load(std::memory_order_relaxed);
}
/// Tick pending requests within the GPU.
void TickWork();
/// Returns a reference to the Maxwell3D GPU engine. /// Returns a reference to the Maxwell3D GPU engine.
Engines::Maxwell3D& Maxwell3D(); Engines::Maxwell3D& Maxwell3D();
@ -325,6 +341,19 @@ private:
std::condition_variable sync_cv; std::condition_variable sync_cv;
struct FlushRequest {
FlushRequest(u64 fence, VAddr addr, std::size_t size)
: fence{fence}, addr{addr}, size{size} {}
u64 fence;
VAddr addr;
std::size_t size;
};
std::list<FlushRequest> flush_requests;
std::atomic<u64> current_flush_fence{};
u64 last_flush_fence{};
std::mutex flush_request_mutex;
const bool is_async; const bool is_async;
}; };

@ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const {
gpu_thread.WaitIdle(); gpu_thread.WaitIdle();
} }
void GPUAsynch::OnCommandListEnd() {
gpu_thread.OnCommandListEnd();
}
} // namespace VideoCommon } // namespace VideoCommon

@ -32,6 +32,8 @@ public:
void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitIdle() const override; void WaitIdle() const override;
void OnCommandListEnd() override;
protected: protected:
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;

@ -6,6 +6,7 @@
#include "common/microprofile.h" #include "common/microprofile.h"
#include "core/core.h" #include "core/core.h"
#include "core/frontend/emu_window.h" #include "core/frontend/emu_window.h"
#include "core/settings.h"
#include "video_core/dma_pusher.h" #include "video_core/dma_pusher.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
#include "video_core/gpu_thread.h" #include "video_core/gpu_thread.h"
@ -14,8 +15,9 @@
namespace VideoCommon::GPUThread { namespace VideoCommon::GPUThread {
/// Runs the GPU thread /// Runs the GPU thread
static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
Tegra::DmaPusher& dma_pusher, SynchState& state) { Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
SynchState& state) {
MicroProfileOnThreadCreate("GpuThread"); MicroProfileOnThreadCreate("GpuThread");
// Wait for first GPU command before acquiring the window context // Wait for first GPU command before acquiring the window context
@ -37,10 +39,14 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic
dma_pusher.DispatchCalls(); dma_pusher.DispatchCalls();
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
} else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
renderer.Rasterizer().ReleaseFences();
} else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) {
system.GPU().TickWork();
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
renderer.Rasterizer().FlushRegion(data->addr, data->size); renderer.Rasterizer().FlushRegion(data->addr, data->size);
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
renderer.Rasterizer().InvalidateRegion(data->addr, data->size); renderer.Rasterizer().OnCPUWrite(data->addr, data->size);
} else if (std::holds_alternative<EndProcessingCommand>(next.data)) { } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
return; return;
} else { } else {
@ -65,8 +71,8 @@ ThreadManager::~ThreadManager() {
void ThreadManager::StartThread(VideoCore::RendererBase& renderer, void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
Core::Frontend::GraphicsContext& context, Core::Frontend::GraphicsContext& context,
Tegra::DmaPusher& dma_pusher) { Tegra::DmaPusher& dma_pusher) {
thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher), thread = std::thread{RunThread, std::ref(system), std::ref(renderer),
std::ref(state)}; std::ref(context), std::ref(dma_pusher), std::ref(state)};
} }
void ThreadManager::SubmitList(Tegra::CommandList&& entries) { void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
@ -78,16 +84,29 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
} }
void ThreadManager::FlushRegion(VAddr addr, u64 size) { void ThreadManager::FlushRegion(VAddr addr, u64 size) {
if (!Settings::IsGPULevelHigh()) {
PushCommand(FlushRegionCommand(addr, size)); PushCommand(FlushRegionCommand(addr, size));
return;
}
if (!Settings::IsGPULevelExtreme()) {
return;
}
if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
auto& gpu = system.GPU();
u64 fence = gpu.RequestFlush(addr, size);
PushCommand(GPUTickCommand());
while (fence > gpu.CurrentFlushRequestFence()) {
}
}
} }
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
system.Renderer().Rasterizer().InvalidateRegion(addr, size); system.Renderer().Rasterizer().OnCPUWrite(addr, size);
} }
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
InvalidateRegion(addr, size); system.Renderer().Rasterizer().OnCPUWrite(addr, size);
} }
void ThreadManager::WaitIdle() const { void ThreadManager::WaitIdle() const {
@ -95,6 +114,10 @@ void ThreadManager::WaitIdle() const {
} }
} }
void ThreadManager::OnCommandListEnd() {
PushCommand(OnCommandListEndCommand());
}
u64 ThreadManager::PushCommand(CommandData&& command_data) { u64 ThreadManager::PushCommand(CommandData&& command_data) {
const u64 fence{++state.last_fence}; const u64 fence{++state.last_fence};
state.queue.Push(CommandDataContainer(std::move(command_data), fence)); state.queue.Push(CommandDataContainer(std::move(command_data), fence));

@ -70,9 +70,16 @@ struct FlushAndInvalidateRegionCommand final {
u64 size; u64 size;
}; };
/// Command called within the gpu, to schedule actions after a command list end
struct OnCommandListEndCommand final {};
/// Command to make the gpu look into pending requests
struct GPUTickCommand final {};
using CommandData = using CommandData =
std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand,
GPUTickCommand>;
struct CommandDataContainer { struct CommandDataContainer {
CommandDataContainer() = default; CommandDataContainer() = default;
@ -122,6 +129,8 @@ public:
// Wait until the gpu thread is idle. // Wait until the gpu thread is idle.
void WaitIdle() const; void WaitIdle() const;
void OnCommandListEnd();
private: private:
/// Pushes a command to be executed by the GPU thread /// Pushes a command to be executed by the GPU thread
u64 PushCommand(CommandData&& command_data); u64 PushCommand(CommandData&& command_data);

@ -12,10 +12,12 @@
#include <mutex> #include <mutex>
#include <optional> #include <optional>
#include <unordered_map> #include <unordered_map>
#include <unordered_set>
#include <vector> #include <vector>
#include "common/assert.h" #include "common/assert.h"
#include "core/core.h" #include "core/core.h"
#include "core/settings.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
@ -130,6 +132,9 @@ public:
} }
query->BindCounter(Stream(type).Current(), timestamp); query->BindCounter(Stream(type).Current(), timestamp);
if (Settings::values.use_asynchronous_gpu_emulation) {
AsyncFlushQuery(cpu_addr);
}
} }
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
@ -170,6 +175,37 @@ public:
return streams[static_cast<std::size_t>(type)]; return streams[static_cast<std::size_t>(type)];
} }
void CommitAsyncFlushes() {
committed_flushes.push_back(uncommitted_flushes);
uncommitted_flushes.reset();
}
bool HasUncommittedFlushes() const {
return uncommitted_flushes != nullptr;
}
bool ShouldWaitAsyncFlushes() const {
if (committed_flushes.empty()) {
return false;
}
return committed_flushes.front() != nullptr;
}
void PopAsyncFlushes() {
if (committed_flushes.empty()) {
return;
}
auto& flush_list = committed_flushes.front();
if (!flush_list) {
committed_flushes.pop_front();
return;
}
for (VAddr query_address : *flush_list) {
FlushAndRemoveRegion(query_address, 4);
}
committed_flushes.pop_front();
}
protected: protected:
std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
@ -224,6 +260,13 @@ private:
return found != std::end(contents) ? &*found : nullptr; return found != std::end(contents) ? &*found : nullptr;
} }
void AsyncFlushQuery(VAddr addr) {
if (!uncommitted_flushes) {
uncommitted_flushes = std::make_shared<std::unordered_set<VAddr>>();
}
uncommitted_flushes->insert(addr);
}
static constexpr std::uintptr_t PAGE_SIZE = 4096; static constexpr std::uintptr_t PAGE_SIZE = 4096;
static constexpr unsigned PAGE_SHIFT = 12; static constexpr unsigned PAGE_SHIFT = 12;
@ -235,6 +278,9 @@ private:
std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
std::array<CounterStream, VideoCore::NumQueryTypes> streams; std::array<CounterStream, VideoCore::NumQueryTypes> streams;
std::shared_ptr<std::unordered_set<VAddr>> uncommitted_flushes{};
std::list<std::shared_ptr<std::unordered_set<VAddr>>> committed_flushes;
}; };
template <class QueryCache, class HostCounter> template <class QueryCache, class HostCounter>

@ -49,15 +49,33 @@ public:
/// Records a GPU query and caches it /// Records a GPU query and caches it
virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
/// Signal a GPU based semaphore as a fence
virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
/// Signal a GPU based syncpoint as a fence
virtual void SignalSyncPoint(u32 value) = 0;
/// Release all pending fences.
virtual void ReleaseFences() = 0;
/// Notify rasterizer that all caches should be flushed to Switch memory /// Notify rasterizer that all caches should be flushed to Switch memory
virtual void FlushAll() = 0; virtual void FlushAll() = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
virtual void FlushRegion(VAddr addr, u64 size) = 0; virtual void FlushRegion(VAddr addr, u64 size) = 0;
/// Check if the the specified memory area requires flushing to CPU Memory.
virtual bool MustFlushRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be invalidated /// Notify rasterizer that any caches of the specified region should be invalidated
virtual void InvalidateRegion(VAddr addr, u64 size) = 0; virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region are desync with guest
virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
/// Sync memory between guest and host.
virtual void SyncGuestHost() = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
/// and invalidated /// and invalidated
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;

@ -52,7 +52,7 @@ Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
} }
void OGLBufferCache::WriteBarrier() { void OGLBufferCache::WriteBarrier() {
glMemoryBarrier(GL_ALL_BARRIER_BITS); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
} }
GLuint OGLBufferCache::ToHandle(const Buffer& buffer) { GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
@ -72,6 +72,7 @@ void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, s
void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
u8* data) { u8* data) {
MICROPROFILE_SCOPE(OpenGL_Buffer_Download); MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(size), data); static_cast<GLsizeiptr>(size), data);
} }

@ -0,0 +1,72 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
namespace OpenGL {
GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed)
: VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {}
GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed)
: VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {}
GLInnerFence::~GLInnerFence() = default;
void GLInnerFence::Queue() {
if (is_stubbed) {
return;
}
ASSERT(sync_object.handle == 0);
sync_object.Create();
}
bool GLInnerFence::IsSignaled() const {
if (is_stubbed) {
return true;
}
ASSERT(sync_object.handle != 0);
GLsizei length;
GLint sync_status;
glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status);
return sync_status == GL_SIGNALED;
}
void GLInnerFence::Wait() {
if (is_stubbed) {
return;
}
ASSERT(sync_object.handle != 0);
glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
}
FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system,
VideoCore::RasterizerInterface& rasterizer,
TextureCacheOpenGL& texture_cache,
OGLBufferCache& buffer_cache, QueryCache& query_cache)
: GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {}
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(value, is_stubbed);
}
Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
}
void FenceManagerOpenGL::QueueFence(Fence& fence) {
fence->Queue();
}
bool FenceManagerOpenGL::IsFenceSignaled(Fence& fence) const {
return fence->IsSignaled();
}
void FenceManagerOpenGL::WaitFence(Fence& fence) {
fence->Wait();
}
} // namespace OpenGL

@ -0,0 +1,53 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/fence_manager.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
namespace OpenGL {
class GLInnerFence : public VideoCommon::FenceBase {
public:
GLInnerFence(u32 payload, bool is_stubbed);
GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed);
~GLInnerFence();
void Queue();
bool IsSignaled() const;
void Wait();
private:
OGLSync sync_object;
};
using Fence = std::shared_ptr<GLInnerFence>;
using GenericFenceManager =
VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>;
class FenceManagerOpenGL final : public GenericFenceManager {
public:
FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
QueryCache& query_cache);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;
};
} // namespace OpenGL

@ -99,9 +99,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
ScreenInfo& info, GLShader::ProgramManager& program_manager, ScreenInfo& info, GLShader::ProgramManager& program_manager,
StateTracker& state_tracker) StateTracker& state_tracker)
: RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker}, : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
CheckExtensions(); CheckExtensions();
} }
@ -599,6 +600,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
EndTransformFeedback(); EndTransformFeedback();
++num_queued_commands; ++num_queued_commands;
system.GPU().TickWork();
} }
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@ -649,6 +652,13 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
query_cache.FlushRegion(addr, size); query_cache.FlushRegion(addr, size);
} }
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
if (!Settings::IsGPULevelHigh()) {
return buffer_cache.MustFlushRegion(addr, size);
}
return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) { if (addr == 0 || size == 0) {
@ -660,8 +670,52 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
query_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size);
} }
void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) {
return;
}
texture_cache.OnCPUWrite(addr, size);
shader_cache.InvalidateRegion(addr, size);
buffer_cache.OnCPUWrite(addr, size);
query_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::SyncGuestHost() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
texture_cache.SyncGuestHost();
buffer_cache.SyncGuestHost();
}
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
auto& memory_manager{gpu.MemoryManager()};
memory_manager.Write<u32>(addr, value);
return;
}
fence_manager.SignalSemaphore(addr, value);
}
void RasterizerOpenGL::SignalSyncPoint(u32 value) {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
gpu.IncrementSyncPoint(value);
return;
}
fence_manager.SignalSyncPoint(value);
}
void RasterizerOpenGL::ReleaseFences() {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
return;
}
fence_manager.WaitPendingFences();
}
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
if (Settings::values.use_accurate_gpu_emulation) { if (Settings::IsGPULevelExtreme()) {
FlushRegion(addr, size); FlushRegion(addr, size);
} }
InvalidateRegion(addr, size); InvalidateRegion(addr, size);

@ -23,6 +23,7 @@
#include "video_core/rasterizer_interface.h" #include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
#include "video_core/renderer_opengl/gl_framebuffer_cache.h" #include "video_core/renderer_opengl/gl_framebuffer_cache.h"
#include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
@ -66,7 +67,13 @@ public:
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void FlushAll() override; void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override; void FlushRegion(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void FlushCommands() override; void FlushCommands() override;
void TickFrame() override; void TickFrame() override;
@ -222,6 +229,8 @@ private:
SamplerCacheOpenGL sampler_cache; SamplerCacheOpenGL sampler_cache;
FramebufferCacheOpenGL framebuffer_cache; FramebufferCacheOpenGL framebuffer_cache;
QueryCache query_cache; QueryCache query_cache;
OGLBufferCache buffer_cache;
FenceManagerOpenGL fence_manager;
Core::System& system; Core::System& system;
ScreenInfo& screen_info; ScreenInfo& screen_info;
@ -229,7 +238,6 @@ private:
StateTracker& state_tracker; StateTracker& state_tracker;
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache;
GLint vertex_binding = 0; GLint vertex_binding = 0;

@ -448,7 +448,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
// Look up shader in the cache based on address // Look up shader in the cache based on address
const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr}; Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader};
if (shader) { if (shader) {
return last_shaders[static_cast<std::size_t>(program)] = shader; return last_shaders[static_cast<std::size_t>(program)] = shader;
} }
@ -477,7 +477,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const std::size_t size_in_bytes = code.size() * sizeof(u64); const std::size_t size_in_bytes = code.size() * sizeof(u64);
shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes); shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
} }
if (cpu_addr) {
Register(shader); Register(shader);
} else {
null_shader = shader;
}
return last_shaders[static_cast<std::size_t>(program)] = shader; return last_shaders[static_cast<std::size_t>(program)] = shader;
} }
@ -486,7 +491,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
auto& memory_manager{system.GPU().MemoryManager()}; auto& memory_manager{system.GPU().MemoryManager()};
const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr; auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
if (kernel) { if (kernel) {
return kernel; return kernel;
} }
@ -507,7 +512,11 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes); kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
} }
if (cpu_addr) {
Register(kernel); Register(kernel);
} else {
null_kernel = kernel;
}
return kernel; return kernel;
} }

@ -125,6 +125,9 @@ private:
ShaderDiskCacheOpenGL disk_cache; ShaderDiskCacheOpenGL disk_cache;
std::unordered_map<u64, PrecompiledShader> runtime_cache; std::unordered_map<u64, PrecompiledShader> runtime_cache;
Shader null_shader{};
Shader null_kernel{};
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
}; };

@ -0,0 +1,101 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include <thread>
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, bool is_stubbed)
: VideoCommon::FenceBase(payload, is_stubbed), device{device}, scheduler{scheduler} {}
InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
u32 payload, bool is_stubbed)
: VideoCommon::FenceBase(address, payload, is_stubbed), device{device}, scheduler{scheduler} {}
InnerFence::~InnerFence() = default;
void InnerFence::Queue() {
if (is_stubbed) {
return;
}
ASSERT(!event);
event = device.GetLogical().CreateEvent();
ticks = scheduler.Ticks();
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) {
cmdbuf.SetEvent(event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
});
}
bool InnerFence::IsSignaled() const {
if (is_stubbed) {
return true;
}
ASSERT(event);
return IsEventSignalled();
}
void InnerFence::Wait() {
if (is_stubbed) {
return;
}
ASSERT(event);
if (ticks >= scheduler.Ticks()) {
scheduler.Flush();
}
while (!IsEventSignalled()) {
std::this_thread::yield();
}
}
bool InnerFence::IsEventSignalled() const {
switch (const VkResult result = event.GetStatus()) {
case VK_EVENT_SET:
return true;
case VK_EVENT_RESET:
return false;
default:
throw vk::Exception(result);
}
}
VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
VKQueryCache& query_cache)
: GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache),
device{device}, scheduler{scheduler} {}
Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed);
}
Fence VKFenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(device, scheduler, addr, value, is_stubbed);
}
void VKFenceManager::QueueFence(Fence& fence) {
fence->Queue();
}
bool VKFenceManager::IsFenceSignaled(Fence& fence) const {
return fence->IsSignaled();
}
void VKFenceManager::WaitFence(Fence& fence) {
fence->Wait();
}
} // namespace Vulkan

@ -0,0 +1,74 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include "video_core/fence_manager.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Core {
class System;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Vulkan {
class VKBufferCache;
class VKDevice;
class VKQueryCache;
class VKScheduler;
class VKTextureCache;
class InnerFence : public VideoCommon::FenceBase {
public:
explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload,
bool is_stubbed);
explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
u32 payload, bool is_stubbed);
~InnerFence();
void Queue();
bool IsSignaled() const;
void Wait();
private:
bool IsEventSignalled() const;
const VKDevice& device;
VKScheduler& scheduler;
vk::Event event;
u64 ticks = 0;
};
using Fence = std::shared_ptr<InnerFence>;
using GenericFenceManager =
VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>;
class VKFenceManager final : public GenericFenceManager {
public:
explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
VKQueryCache& query_cache);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;
private:
const VKDevice& device;
VKScheduler& scheduler;
};
} // namespace Vulkan

@ -207,7 +207,7 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
const GPUVAddr program_addr{GetShaderAddress(system, program)}; const GPUVAddr program_addr{GetShaderAddress(system, program)};
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr); ASSERT(cpu_addr);
auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
if (!shader) { if (!shader) {
const auto host_ptr{memory_manager.GetPointer(program_addr)}; const auto host_ptr{memory_manager.GetPointer(program_addr)};
@ -218,7 +218,11 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
std::move(code), stage_offset); std::move(code), stage_offset);
if (cpu_addr) {
Register(shader); Register(shader);
} else {
null_shader = shader;
}
} }
shaders[index] = std::move(shader); shaders[index] = std::move(shader);
} }
@ -261,7 +265,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr); ASSERT(cpu_addr);
auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
if (!shader) { if (!shader) {
// No shader found - create a new one // No shader found - create a new one
const auto host_ptr = memory_manager.GetPointer(program_addr); const auto host_ptr = memory_manager.GetPointer(program_addr);
@ -271,7 +275,11 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
program_addr, *cpu_addr, std::move(code), program_addr, *cpu_addr, std::move(code),
kernel_main_offset); kernel_main_offset);
if (cpu_addr) {
Register(shader); Register(shader);
} else {
null_kernel = shader;
}
} }
Specialization specialization; Specialization specialization;

@ -182,6 +182,9 @@ private:
VKUpdateDescriptorQueue& update_descriptor_queue; VKUpdateDescriptorQueue& update_descriptor_queue;
VKRenderPassCache& renderpass_cache; VKRenderPassCache& renderpass_cache;
Shader null_shader{};
Shader null_kernel{};
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
GraphicsPipelineCacheKey last_graphics_key; GraphicsPipelineCacheKey last_graphics_key;

@ -17,6 +17,7 @@
#include "common/microprofile.h" #include "common/microprofile.h"
#include "core/core.h" #include "core/core.h"
#include "core/memory.h" #include "core/memory.h"
#include "core/settings.h"
#include "video_core/engines/kepler_compute.h" #include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
@ -299,7 +300,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
renderpass_cache), renderpass_cache),
buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
sampler_cache(device), query_cache(system, *this, device, scheduler) { sampler_cache(device),
fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
query_cache(system, *this, device, scheduler) {
scheduler.SetQueryCache(query_cache); scheduler.SetQueryCache(query_cache);
} }
@ -360,6 +363,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
}); });
EndTransformFeedback(); EndTransformFeedback();
system.GPU().TickWork();
} }
void RasterizerVulkan::Clear() { void RasterizerVulkan::Clear() {
@ -504,6 +509,13 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
query_cache.FlushRegion(addr, size); query_cache.FlushRegion(addr, size);
} }
bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
if (!Settings::IsGPULevelHigh()) {
return buffer_cache.MustFlushRegion(addr, size);
}
return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
}
void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
if (addr == 0 || size == 0) { if (addr == 0 || size == 0) {
return; return;
@ -514,6 +526,47 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
query_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size);
} }
void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
texture_cache.OnCPUWrite(addr, size);
pipeline_cache.InvalidateRegion(addr, size);
buffer_cache.OnCPUWrite(addr, size);
query_cache.InvalidateRegion(addr, size);
}
void RasterizerVulkan::SyncGuestHost() {
texture_cache.SyncGuestHost();
buffer_cache.SyncGuestHost();
}
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
gpu.MemoryManager().Write<u32>(addr, value);
return;
}
fence_manager.SignalSemaphore(addr, value);
}
void RasterizerVulkan::SignalSyncPoint(u32 value) {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
gpu.IncrementSyncPoint(value);
return;
}
fence_manager.SignalSyncPoint(value);
}
void RasterizerVulkan::ReleaseFences() {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
return;
}
fence_manager.WaitPendingFences();
}
void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
FlushRegion(addr, size); FlushRegion(addr, size);
InvalidateRegion(addr, size); InvalidateRegion(addr, size);

@ -21,6 +21,7 @@
#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h"
@ -118,7 +119,13 @@ public:
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void FlushAll() override; void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override; void FlushRegion(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void FlushCommands() override; void FlushCommands() override;
void TickFrame() override; void TickFrame() override;
@ -261,6 +268,7 @@ private:
VKPipelineCache pipeline_cache; VKPipelineCache pipeline_cache;
VKBufferCache buffer_cache; VKBufferCache buffer_cache;
VKSamplerCache sampler_cache; VKSamplerCache sampler_cache;
VKFenceManager fence_manager;
VKQueryCache query_cache; VKQueryCache query_cache;
std::array<View, Maxwell::NumRenderTargets> color_attachments; std::array<View, Maxwell::NumRenderTargets> color_attachments;

@ -63,6 +63,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdSetBlendConstants); X(vkCmdSetBlendConstants);
X(vkCmdSetDepthBias); X(vkCmdSetDepthBias);
X(vkCmdSetDepthBounds); X(vkCmdSetDepthBounds);
X(vkCmdSetEvent);
X(vkCmdSetScissor); X(vkCmdSetScissor);
X(vkCmdSetStencilCompareMask); X(vkCmdSetStencilCompareMask);
X(vkCmdSetStencilReference); X(vkCmdSetStencilReference);
@ -75,6 +76,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCreateDescriptorPool); X(vkCreateDescriptorPool);
X(vkCreateDescriptorSetLayout); X(vkCreateDescriptorSetLayout);
X(vkCreateDescriptorUpdateTemplateKHR); X(vkCreateDescriptorUpdateTemplateKHR);
X(vkCreateEvent);
X(vkCreateFence); X(vkCreateFence);
X(vkCreateFramebuffer); X(vkCreateFramebuffer);
X(vkCreateGraphicsPipelines); X(vkCreateGraphicsPipelines);
@ -93,6 +95,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkDestroyDescriptorPool); X(vkDestroyDescriptorPool);
X(vkDestroyDescriptorSetLayout); X(vkDestroyDescriptorSetLayout);
X(vkDestroyDescriptorUpdateTemplateKHR); X(vkDestroyDescriptorUpdateTemplateKHR);
X(vkDestroyEvent);
X(vkDestroyFence); X(vkDestroyFence);
X(vkDestroyFramebuffer); X(vkDestroyFramebuffer);
X(vkDestroyImage); X(vkDestroyImage);
@ -112,6 +115,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkFreeMemory); X(vkFreeMemory);
X(vkGetBufferMemoryRequirements); X(vkGetBufferMemoryRequirements);
X(vkGetDeviceQueue); X(vkGetDeviceQueue);
X(vkGetEventStatus);
X(vkGetFenceStatus); X(vkGetFenceStatus);
X(vkGetImageMemoryRequirements); X(vkGetImageMemoryRequirements);
X(vkGetQueryPoolResults); X(vkGetQueryPoolResults);
@ -269,6 +273,10 @@ void Destroy(VkDevice device, VkDeviceMemory handle, const DeviceDispatch& dld)
dld.vkFreeMemory(device, handle, nullptr); dld.vkFreeMemory(device, handle, nullptr);
} }
void Destroy(VkDevice device, VkEvent handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyEvent(device, handle, nullptr);
}
void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept { void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyFence(device, handle, nullptr); dld.vkDestroyFence(device, handle, nullptr);
} }
@ -599,6 +607,16 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
return ShaderModule(object, handle, *dld); return ShaderModule(object, handle, *dld);
} }
Event Device::CreateEvent() const {
VkEventCreateInfo ci;
ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
ci.pNext = nullptr;
ci.flags = 0;
VkEvent object;
Check(dld->vkCreateEvent(handle, &ci, nullptr, &object));
return Event(object, handle, *dld);
}
SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const { SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const {
VkSwapchainKHR object; VkSwapchainKHR object;
Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object)); Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object));

@ -199,6 +199,7 @@ struct DeviceDispatch : public InstanceDispatch {
PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants; PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants;
PFN_vkCmdSetDepthBias vkCmdSetDepthBias; PFN_vkCmdSetDepthBias vkCmdSetDepthBias;
PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds; PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds;
PFN_vkCmdSetEvent vkCmdSetEvent;
PFN_vkCmdSetScissor vkCmdSetScissor; PFN_vkCmdSetScissor vkCmdSetScissor;
PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask; PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask;
PFN_vkCmdSetStencilReference vkCmdSetStencilReference; PFN_vkCmdSetStencilReference vkCmdSetStencilReference;
@ -211,6 +212,7 @@ struct DeviceDispatch : public InstanceDispatch {
PFN_vkCreateDescriptorPool vkCreateDescriptorPool; PFN_vkCreateDescriptorPool vkCreateDescriptorPool;
PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout; PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout;
PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR;
PFN_vkCreateEvent vkCreateEvent;
PFN_vkCreateFence vkCreateFence; PFN_vkCreateFence vkCreateFence;
PFN_vkCreateFramebuffer vkCreateFramebuffer; PFN_vkCreateFramebuffer vkCreateFramebuffer;
PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines; PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines;
@ -229,6 +231,7 @@ struct DeviceDispatch : public InstanceDispatch {
PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool; PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool;
PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout; PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout;
PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR;
PFN_vkDestroyEvent vkDestroyEvent;
PFN_vkDestroyFence vkDestroyFence; PFN_vkDestroyFence vkDestroyFence;
PFN_vkDestroyFramebuffer vkDestroyFramebuffer; PFN_vkDestroyFramebuffer vkDestroyFramebuffer;
PFN_vkDestroyImage vkDestroyImage; PFN_vkDestroyImage vkDestroyImage;
@ -248,6 +251,7 @@ struct DeviceDispatch : public InstanceDispatch {
PFN_vkFreeMemory vkFreeMemory; PFN_vkFreeMemory vkFreeMemory;
PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements;
PFN_vkGetDeviceQueue vkGetDeviceQueue; PFN_vkGetDeviceQueue vkGetDeviceQueue;
PFN_vkGetEventStatus vkGetEventStatus;
PFN_vkGetFenceStatus vkGetFenceStatus; PFN_vkGetFenceStatus vkGetFenceStatus;
PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
PFN_vkGetQueryPoolResults vkGetQueryPoolResults; PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
@ -279,6 +283,7 @@ void Destroy(VkDevice, VkDescriptorPool, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept; void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept; void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept; void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkEvent, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept; void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept; void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept; void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept;
@ -648,6 +653,15 @@ public:
std::vector<VkImage> GetImages() const; std::vector<VkImage> GetImages() const;
}; };
class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> {
using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle;
public:
VkResult GetStatus() const noexcept {
return dld->vkGetEventStatus(owner, handle);
}
};
class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> { class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle; using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle;
@ -695,6 +709,8 @@ public:
ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
Event CreateEvent() const;
SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;
DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept; DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept;
@ -938,6 +954,10 @@ public:
dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds); dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds);
} }
void SetEvent(VkEvent event, VkPipelineStageFlags stage_flags) const noexcept {
dld->vkCmdSetEvent(handle, event, stage_flags);
}
void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
const VkDeviceSize* offsets, const VkDeviceSize* offsets,
const VkDeviceSize* sizes) const noexcept { const VkDeviceSize* sizes) const noexcept {

@ -192,6 +192,22 @@ public:
index = index_; index = index_;
} }
void SetMemoryMarked(bool is_memory_marked_) {
is_memory_marked = is_memory_marked_;
}
bool IsMemoryMarked() const {
return is_memory_marked;
}
void SetSyncPending(bool is_sync_pending_) {
is_sync_pending = is_sync_pending_;
}
bool IsSyncPending() const {
return is_sync_pending;
}
void MarkAsPicked(bool is_picked_) { void MarkAsPicked(bool is_picked_) {
is_picked = is_picked_; is_picked = is_picked_;
} }
@ -303,6 +319,8 @@ private:
bool is_target{}; bool is_target{};
bool is_registered{}; bool is_registered{};
bool is_picked{}; bool is_picked{};
bool is_memory_marked{};
bool is_sync_pending{};
u32 index{NO_RT}; u32 index{NO_RT};
u64 modification_tick{}; u64 modification_tick{};
}; };

@ -6,6 +6,7 @@
#include <algorithm> #include <algorithm>
#include <array> #include <array>
#include <list>
#include <memory> #include <memory>
#include <mutex> #include <mutex>
#include <set> #include <set>
@ -62,6 +63,30 @@ public:
} }
} }
void OnCPUWrite(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
for (const auto& surface : GetSurfacesInRegion(addr, size)) {
if (surface->IsMemoryMarked()) {
UnmarkMemory(surface);
surface->SetSyncPending(true);
marked_for_unregister.emplace_back(surface);
}
}
}
void SyncGuestHost() {
std::lock_guard lock{mutex};
for (const auto& surface : marked_for_unregister) {
if (surface->IsRegistered()) {
surface->SetSyncPending(false);
Unregister(surface);
}
}
marked_for_unregister.clear();
}
/** /**
* Guarantees that rendertargets don't unregister themselves if the * Guarantees that rendertargets don't unregister themselves if the
* collide. Protection is currently only done on 3D slices. * collide. Protection is currently only done on 3D slices.
@ -85,10 +110,20 @@ public:
return a->GetModificationTick() < b->GetModificationTick(); return a->GetModificationTick() < b->GetModificationTick();
}); });
for (const auto& surface : surfaces) { for (const auto& surface : surfaces) {
mutex.unlock();
FlushSurface(surface); FlushSurface(surface);
mutex.lock();
} }
} }
bool MustFlushRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
const auto surfaces = GetSurfacesInRegion(addr, size);
return std::any_of(surfaces.cbegin(), surfaces.cend(),
[](const TSurface& surface) { return surface->IsModified(); });
}
TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, TView GetTextureSurface(const Tegra::Texture::TICEntry& tic,
const VideoCommon::Shader::Sampler& entry) { const VideoCommon::Shader::Sampler& entry) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
@ -206,8 +241,14 @@ public:
auto surface_view = GetSurface(gpu_addr, *cpu_addr, auto surface_view = GetSurface(gpu_addr, *cpu_addr,
SurfaceParams::CreateForFramebuffer(system, index), true); SurfaceParams::CreateForFramebuffer(system, index), true);
if (render_targets[index].target) if (render_targets[index].target) {
render_targets[index].target->MarkAsRenderTarget(false, NO_RT); auto& surface = render_targets[index].target;
surface->MarkAsRenderTarget(false, NO_RT);
const auto& cr_params = surface->GetSurfaceParams();
if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation) {
AsyncFlushSurface(surface);
}
}
render_targets[index].target = surface_view.first; render_targets[index].target = surface_view.first;
render_targets[index].view = surface_view.second; render_targets[index].view = surface_view.second;
if (render_targets[index].target) if (render_targets[index].target)
@ -284,6 +325,34 @@ public:
return ++ticks; return ++ticks;
} }
void CommitAsyncFlushes() {
committed_flushes.push_back(uncommitted_flushes);
uncommitted_flushes.reset();
}
bool HasUncommittedFlushes() const {
return uncommitted_flushes != nullptr;
}
bool ShouldWaitAsyncFlushes() const {
return !committed_flushes.empty() && committed_flushes.front() != nullptr;
}
void PopAsyncFlushes() {
if (committed_flushes.empty()) {
return;
}
auto& flush_list = committed_flushes.front();
if (!flush_list) {
committed_flushes.pop_front();
return;
}
for (TSurface& surface : *flush_list) {
FlushSurface(surface);
}
committed_flushes.pop_front();
}
protected: protected:
explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
bool is_astc_supported) bool is_astc_supported)
@ -345,9 +414,20 @@ protected:
surface->SetCpuAddr(*cpu_addr); surface->SetCpuAddr(*cpu_addr);
RegisterInnerCache(surface); RegisterInnerCache(surface);
surface->MarkAsRegistered(true); surface->MarkAsRegistered(true);
surface->SetMemoryMarked(true);
rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
} }
void UnmarkMemory(TSurface surface) {
if (!surface->IsMemoryMarked()) {
return;
}
const std::size_t size = surface->GetSizeInBytes();
const VAddr cpu_addr = surface->GetCpuAddr();
rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
surface->SetMemoryMarked(false);
}
void Unregister(TSurface surface) { void Unregister(TSurface surface) {
if (guard_render_targets && surface->IsProtected()) { if (guard_render_targets && surface->IsProtected()) {
return; return;
@ -355,9 +435,11 @@ protected:
if (!guard_render_targets && surface->IsRenderTarget()) { if (!guard_render_targets && surface->IsRenderTarget()) {
ManageRenderTargetUnregister(surface); ManageRenderTargetUnregister(surface);
} }
const std::size_t size = surface->GetSizeInBytes(); UnmarkMemory(surface);
const VAddr cpu_addr = surface->GetCpuAddr(); if (surface->IsSyncPending()) {
rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); marked_for_unregister.remove(surface);
surface->SetSyncPending(false);
}
UnregisterInnerCache(surface); UnregisterInnerCache(surface);
surface->MarkAsRegistered(false); surface->MarkAsRegistered(false);
ReserveSurface(surface->GetSurfaceParams(), surface); ReserveSurface(surface->GetSurfaceParams(), surface);
@ -417,7 +499,7 @@ private:
**/ **/
RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
if (Settings::values.use_accurate_gpu_emulation) { if (Settings::IsGPULevelExtreme()) {
return RecycleStrategy::Flush; return RecycleStrategy::Flush;
} }
// 3D Textures decision // 3D Textures decision
@ -461,7 +543,7 @@ private:
} }
switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
case RecycleStrategy::Ignore: { case RecycleStrategy::Ignore: {
return InitializeSurface(gpu_addr, params, Settings::values.use_accurate_gpu_emulation); return InitializeSurface(gpu_addr, params, Settings::IsGPULevelExtreme());
} }
case RecycleStrategy::Flush: { case RecycleStrategy::Flush: {
std::sort(overlaps.begin(), overlaps.end(), std::sort(overlaps.begin(), overlaps.end(),
@ -509,7 +591,7 @@ private:
} }
const auto& final_params = new_surface->GetSurfaceParams(); const auto& final_params = new_surface->GetSurfaceParams();
if (cr_params.type != final_params.type) { if (cr_params.type != final_params.type) {
if (Settings::values.use_accurate_gpu_emulation) { if (Settings::IsGPULevelExtreme()) {
BufferCopy(current_surface, new_surface); BufferCopy(current_surface, new_surface);
} }
} else { } else {
@ -598,7 +680,7 @@ private:
if (passed_tests == 0) { if (passed_tests == 0) {
return {}; return {};
// In Accurate GPU all tests should pass, else we recycle // In Accurate GPU all tests should pass, else we recycle
} else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { } else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
return {}; return {};
} }
for (const auto& surface : overlaps) { for (const auto& surface : overlaps) {
@ -668,7 +750,7 @@ private:
for (const auto& surface : overlaps) { for (const auto& surface : overlaps) {
if (!surface->MatchTarget(params.target)) { if (!surface->MatchTarget(params.target)) {
if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
if (Settings::values.use_accurate_gpu_emulation) { if (Settings::IsGPULevelExtreme()) {
return std::nullopt; return std::nullopt;
} }
Unregister(surface); Unregister(surface);
@ -1106,6 +1188,13 @@ private:
TView view; TView view;
}; };
void AsyncFlushSurface(TSurface& surface) {
if (!uncommitted_flushes) {
uncommitted_flushes = std::make_shared<std::list<TSurface>>();
}
uncommitted_flushes->push_back(surface);
}
VideoCore::RasterizerInterface& rasterizer; VideoCore::RasterizerInterface& rasterizer;
FormatLookupTable format_lookup_table; FormatLookupTable format_lookup_table;
@ -1150,6 +1239,11 @@ private:
std::unordered_map<u32, TSurface> invalid_cache; std::unordered_map<u32, TSurface> invalid_cache;
std::vector<u8> invalid_memory; std::vector<u8> invalid_memory;
std::list<TSurface> marked_for_unregister;
std::shared_ptr<std::list<TSurface>> uncommitted_flushes{};
std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes;
StagingCache staging_cache; StagingCache staging_cache;
std::recursive_mutex mutex; std::recursive_mutex mutex;
}; };

@ -639,8 +639,8 @@ void Config::ReadRendererValues() {
Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt();
Settings::values.use_disk_shader_cache = Settings::values.use_disk_shader_cache =
ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool();
Settings::values.use_accurate_gpu_emulation = const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt();
ReadSetting(QStringLiteral("use_accurate_gpu_emulation"), false).toBool(); Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
Settings::values.use_asynchronous_gpu_emulation = Settings::values.use_asynchronous_gpu_emulation =
ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool();
Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool();
@ -1080,8 +1080,8 @@ void Config::SaveRendererValues() {
WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100);
WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache,
true); true);
WriteSetting(QStringLiteral("use_accurate_gpu_emulation"), WriteSetting(QStringLiteral("gpu_accuracy"), static_cast<int>(Settings::values.gpu_accuracy),
Settings::values.use_accurate_gpu_emulation, false); 0);
WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
Settings::values.use_asynchronous_gpu_emulation, false); Settings::values.use_asynchronous_gpu_emulation, false);
WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);

@ -19,7 +19,7 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
void ConfigureGraphicsAdvanced::SetConfiguration() { void ConfigureGraphicsAdvanced::SetConfiguration() {
const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy));
ui->use_vsync->setEnabled(runtime_lock); ui->use_vsync->setEnabled(runtime_lock);
ui->use_vsync->setChecked(Settings::values.use_vsync); ui->use_vsync->setChecked(Settings::values.use_vsync);
ui->force_30fps_mode->setEnabled(runtime_lock); ui->force_30fps_mode->setEnabled(runtime_lock);
@ -29,7 +29,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
} }
void ConfigureGraphicsAdvanced::ApplyConfiguration() { void ConfigureGraphicsAdvanced::ApplyConfiguration() {
Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex());
Settings::values.gpu_accuracy = gpu_accuracy;
Settings::values.use_vsync = ui->use_vsync->isChecked(); Settings::values.use_vsync = ui->use_vsync->isChecked();
Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex();

@ -23,12 +23,35 @@
</property> </property>
<layout class="QVBoxLayout" name="verticalLayout_3"> <layout class="QVBoxLayout" name="verticalLayout_3">
<item> <item>
<widget class="QCheckBox" name="use_accurate_gpu_emulation"> <layout class="QHBoxLayout" name="horizontalLayout_2">
<item>
<widget class="QLabel" name="label_gpu_accuracy">
<property name="text"> <property name="text">
<string>Use accurate GPU emulation (slow)</string> <string>Accuracy Level:</string>
</property> </property>
</widget> </widget>
</item> </item>
<item>
<widget class="QComboBox" name="gpu_accuracy">
<item>
<property name="text">
<string notr="true">Normal</string>
</property>
</item>
<item>
<property name="text">
<string notr="true">High</string>
</property>
</item>
<item>
<property name="text">
<string notr="true">Extreme(very slow)</string>
</property>
</item>
</widget>
</item>
</layout>
</item>
<item> <item>
<widget class="QCheckBox" name="use_vsync"> <widget class="QCheckBox" name="use_vsync">
<property name="toolTip"> <property name="toolTip">

@ -388,8 +388,8 @@ void Config::ReadValues() {
static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
Settings::values.use_disk_shader_cache = Settings::values.use_disk_shader_cache =
sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
Settings::values.use_accurate_gpu_emulation = const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0);
sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
Settings::values.use_asynchronous_gpu_emulation = Settings::values.use_asynchronous_gpu_emulation =
sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
Settings::values.use_vsync = Settings::values.use_vsync =

@ -146,9 +146,9 @@ frame_limit =
# 0 (default): Off, 1 : On # 0 (default): Off, 1 : On
use_disk_shader_cache = use_disk_shader_cache =
# Whether to use accurate GPU emulation # Which gpu accuracy level to use
# 0 (default): Off (fast), 1 : On (slow) # 0 (Normal), 1 (High), 2 (Extreme)
use_accurate_gpu_emulation = gpu_accuracy =
# Whether to use asynchronous GPU emulation # Whether to use asynchronous GPU emulation
# 0 : Off (slow), 1 (default): On (fast) # 0 : Off (slow), 1 (default): On (fast)

@ -126,8 +126,8 @@ void Config::ReadValues() {
Settings::values.frame_limit = 100; Settings::values.frame_limit = 100;
Settings::values.use_disk_shader_cache = Settings::values.use_disk_shader_cache =
sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
Settings::values.use_accurate_gpu_emulation = const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0);
sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
Settings::values.use_asynchronous_gpu_emulation = Settings::values.use_asynchronous_gpu_emulation =
sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);