RasterizerMemory: Add filtering for flushing/invalidation operations.

master
Fernando Sahmkow 2022-11-20 03:07:14 +07:00
parent 2793304117
commit 3630bfaef3
14 changed files with 189 additions and 96 deletions

@ -13,6 +13,7 @@ add_library(video_core STATIC
buffer_cache/buffer_base.h buffer_cache/buffer_base.h
buffer_cache/buffer_cache.cpp buffer_cache/buffer_cache.cpp
buffer_cache/buffer_cache.h buffer_cache/buffer_cache.h
cache_types.h
cdma_pusher.cpp cdma_pusher.cpp
cdma_pusher.h cdma_pusher.h
compatible_formats.cpp compatible_formats.cpp

@ -208,7 +208,7 @@ public:
[[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer(); [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer();
std::mutex mutex; std::recursive_mutex mutex;
Runtime& runtime; Runtime& runtime;
private: private:

@ -0,0 +1,24 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/common_funcs.h"
#include "common/common_types.h"
namespace VideoCommon {
enum class CacheType : u32 {
None = 0,
TextureCache = 1 << 0,
QueryCache = 1 << 1,
BufferCache = 1 << 2,
ShaderCache = 1 << 3,
NoTextureCache = QueryCache | BufferCache | ShaderCache,
NoBufferCache = TextureCache | QueryCache | ShaderCache,
NoQueryCache = TextureCache | BufferCache | ShaderCache,
All = TextureCache | QueryCache | BufferCache | ShaderCache,
};
DECLARE_ENUM_FLAG_OPERATORS(CacheType)
} // namespace VideoCommon

@ -356,8 +356,8 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si
} }
template <bool is_safe> template <bool is_safe>
void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
std::size_t size) const { [[maybe_unused]] VideoCommon::CacheType which) const {
auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index, auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index,
[[maybe_unused]] std::size_t offset, std::size_t copy_amount) { [[maybe_unused]] std::size_t offset, std::size_t copy_amount) {
std::memset(dest_buffer, 0, copy_amount); std::memset(dest_buffer, 0, copy_amount);
@ -367,7 +367,7 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer,
const VAddr cpu_addr_base = const VAddr cpu_addr_base =
(static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
if constexpr (is_safe) { if constexpr (is_safe) {
rasterizer->FlushRegion(cpu_addr_base, copy_amount); rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
} }
u8* physical = memory.GetPointer(cpu_addr_base); u8* physical = memory.GetPointer(cpu_addr_base);
std::memcpy(dest_buffer, physical, copy_amount); std::memcpy(dest_buffer, physical, copy_amount);
@ -377,7 +377,7 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer,
const VAddr cpu_addr_base = const VAddr cpu_addr_base =
(static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
if constexpr (is_safe) { if constexpr (is_safe) {
rasterizer->FlushRegion(cpu_addr_base, copy_amount); rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
} }
if (!IsBigPageContinous(page_index)) [[unlikely]] { if (!IsBigPageContinous(page_index)) [[unlikely]] {
memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
@ -395,18 +395,19 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer,
MemoryOperation<true>(gpu_src_addr, size, mapped_big, set_to_zero, read_short_pages); MemoryOperation<true>(gpu_src_addr, size, mapped_big, set_to_zero, read_short_pages);
} }
void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const { void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size); VideoCommon::CacheType which) const {
ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size, which);
} }
void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
const std::size_t size) const { const std::size_t size) const {
ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size); ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
} }
template <bool is_safe> template <bool is_safe>
void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size,
std::size_t size) { [[maybe_unused]] VideoCommon::CacheType which) {
auto just_advance = [&]([[maybe_unused]] std::size_t page_index, auto just_advance = [&]([[maybe_unused]] std::size_t page_index,
[[maybe_unused]] std::size_t offset, std::size_t copy_amount) { [[maybe_unused]] std::size_t offset, std::size_t copy_amount) {
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
@ -415,7 +416,7 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe
const VAddr cpu_addr_base = const VAddr cpu_addr_base =
(static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
if constexpr (is_safe) { if constexpr (is_safe) {
rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which);
} }
u8* physical = memory.GetPointer(cpu_addr_base); u8* physical = memory.GetPointer(cpu_addr_base);
std::memcpy(physical, src_buffer, copy_amount); std::memcpy(physical, src_buffer, copy_amount);
@ -425,7 +426,7 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe
const VAddr cpu_addr_base = const VAddr cpu_addr_base =
(static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
if constexpr (is_safe) { if constexpr (is_safe) {
rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which);
} }
if (!IsBigPageContinous(page_index)) [[unlikely]] { if (!IsBigPageContinous(page_index)) [[unlikely]] {
memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount); memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount);
@ -443,16 +444,18 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe
MemoryOperation<true>(gpu_dest_addr, size, mapped_big, just_advance, write_short_pages); MemoryOperation<true>(gpu_dest_addr, size, mapped_big, just_advance, write_short_pages);
} }
void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) { void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size,
WriteBlockImpl<true>(gpu_dest_addr, src_buffer, size); VideoCommon::CacheType which) {
WriteBlockImpl<true>(gpu_dest_addr, src_buffer, size, which);
} }
void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer,
std::size_t size) { std::size_t size) {
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size); WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
} }
void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const { void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size,
VideoCommon::CacheType which) const {
auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
[[maybe_unused]] std::size_t offset, [[maybe_unused]] std::size_t offset,
[[maybe_unused]] std::size_t copy_amount) {}; [[maybe_unused]] std::size_t copy_amount) {};
@ -460,12 +463,12 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const {
auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
const VAddr cpu_addr_base = const VAddr cpu_addr_base =
(static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
rasterizer->FlushRegion(cpu_addr_base, copy_amount); rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
}; };
auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
const VAddr cpu_addr_base = const VAddr cpu_addr_base =
(static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
rasterizer->FlushRegion(cpu_addr_base, copy_amount); rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
}; };
auto flush_short_pages = [&](std::size_t page_index, std::size_t offset, auto flush_short_pages = [&](std::size_t page_index, std::size_t offset,
std::size_t copy_amount) { std::size_t copy_amount) {
@ -475,7 +478,8 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const {
MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, flush_short_pages); MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, flush_short_pages);
} }
bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const { bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size,
VideoCommon::CacheType which) const {
bool result = false; bool result = false;
auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
[[maybe_unused]] std::size_t offset, [[maybe_unused]] std::size_t offset,
@ -484,13 +488,13 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const {
auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
const VAddr cpu_addr_base = const VAddr cpu_addr_base =
(static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount); result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which);
return result; return result;
}; };
auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
const VAddr cpu_addr_base = const VAddr cpu_addr_base =
(static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount); result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which);
return result; return result;
}; };
auto check_short_pages = [&](std::size_t page_index, std::size_t offset, auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
@ -547,7 +551,8 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) co
return kind_map.GetContinousSizeFrom(gpu_addr); return kind_map.GetContinousSizeFrom(gpu_addr);
} }
void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const { void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
VideoCommon::CacheType which) const {
auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
[[maybe_unused]] std::size_t offset, [[maybe_unused]] std::size_t offset,
[[maybe_unused]] std::size_t copy_amount) {}; [[maybe_unused]] std::size_t copy_amount) {};
@ -555,12 +560,12 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const {
auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
const VAddr cpu_addr_base = const VAddr cpu_addr_base =
(static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which);
}; };
auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
const VAddr cpu_addr_base = const VAddr cpu_addr_base =
(static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which);
}; };
auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset, auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset,
std::size_t copy_amount) { std::size_t copy_amount) {
@ -570,14 +575,15 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const {
MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, invalidate_short_pages); MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, invalidate_short_pages);
} }
void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) { void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size,
VideoCommon::CacheType which) {
std::vector<u8> tmp_buffer(size); std::vector<u8> tmp_buffer(size);
ReadBlock(gpu_src_addr, tmp_buffer.data(), size); ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which);
// The output block must be flushed in case it has data modified from the GPU. // The output block must be flushed in case it has data modified from the GPU.
// Fixes NPC geometry in Zombie Panic in Wonderland DX // Fixes NPC geometry in Zombie Panic in Wonderland DX
FlushRegion(gpu_dest_addr, size); FlushRegion(gpu_dest_addr, size, which);
WriteBlock(gpu_dest_addr, tmp_buffer.data(), size); WriteBlock(gpu_dest_addr, tmp_buffer.data(), size, which);
} }
bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {

@ -12,6 +12,7 @@
#include "common/multi_level_page_table.h" #include "common/multi_level_page_table.h"
#include "common/range_map.h" #include "common/range_map.h"
#include "common/virtual_buffer.h" #include "common/virtual_buffer.h"
#include "video_core/cache_types.h"
#include "video_core/pte_kind.h" #include "video_core/pte_kind.h"
namespace VideoCore { namespace VideoCore {
@ -60,9 +61,12 @@ public:
* in the Host Memory counterpart. Note: This functions cause Host GPU Memory * in the Host Memory counterpart. Note: This functions cause Host GPU Memory
* Flushes and Invalidations, respectively to each operation. * Flushes and Invalidations, respectively to each operation.
*/ */
void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); VideoCommon::CacheType which = VideoCommon::CacheType::All) const;
void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size); void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size,
VideoCommon::CacheType which = VideoCommon::CacheType::All);
void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size,
VideoCommon::CacheType which = VideoCommon::CacheType::All);
/** /**
* ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
@ -105,11 +109,14 @@ public:
GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true); GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true);
void Unmap(GPUVAddr gpu_addr, std::size_t size); void Unmap(GPUVAddr gpu_addr, std::size_t size);
void FlushRegion(GPUVAddr gpu_addr, size_t size) const; void FlushRegion(GPUVAddr gpu_addr, size_t size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) const;
void InvalidateRegion(GPUVAddr gpu_addr, size_t size) const; void InvalidateRegion(GPUVAddr gpu_addr, size_t size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) const;
bool IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const; bool IsMemoryDirty(GPUVAddr gpu_addr, size_t size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) const;
size_t MaxContinousRange(GPUVAddr gpu_addr, size_t size) const; size_t MaxContinousRange(GPUVAddr gpu_addr, size_t size) const;
@ -128,10 +135,12 @@ private:
FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const; FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const;
template <bool is_safe> template <bool is_safe>
void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
VideoCommon::CacheType which) const;
template <bool is_safe> template <bool is_safe>
void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size,
VideoCommon::CacheType which);
template <bool is_big_page> template <bool is_big_page>
[[nodiscard]] std::size_t PageEntryIndex(GPUVAddr gpu_addr) const { [[nodiscard]] std::size_t PageEntryIndex(GPUVAddr gpu_addr) const {

@ -8,6 +8,7 @@
#include <span> #include <span>
#include "common/common_types.h" #include "common/common_types.h"
#include "common/polyfill_thread.h" #include "common/polyfill_thread.h"
#include "video_core/cache_types.h"
#include "video_core/engines/fermi_2d.h" #include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
@ -83,13 +84,16 @@ public:
virtual void FlushAll() = 0; virtual void FlushAll() = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
virtual void FlushRegion(VAddr addr, u64 size) = 0; virtual void FlushRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
/// Check if the the specified memory area requires flushing to CPU Memory. /// Check if the the specified memory area requires flushing to CPU Memory.
virtual bool MustFlushRegion(VAddr addr, u64 size) = 0; virtual bool MustFlushRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
/// Notify rasterizer that any caches of the specified region should be invalidated /// Notify rasterizer that any caches of the specified region should be invalidated
virtual void InvalidateRegion(VAddr addr, u64 size) = 0; virtual void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
/// Notify rasterizer that any caches of the specified region are desync with guest /// Notify rasterizer that any caches of the specified region are desync with guest
virtual void OnCPUWrite(VAddr addr, u64 size) = 0; virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
@ -105,7 +109,8 @@ public:
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
/// and invalidated /// and invalidated
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; virtual void FlushAndInvalidateRegion(
VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
/// Notify the host renderer to wait for previous primitive and compute operations. /// Notify the host renderer to wait for previous primitive and compute operations.
virtual void WaitForIdle() = 0; virtual void WaitForIdle() = 0;

@ -39,11 +39,11 @@ void RasterizerNull::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr
u32 size) {} u32 size) {}
void RasterizerNull::DisableGraphicsUniformBuffer(size_t stage, u32 index) {} void RasterizerNull::DisableGraphicsUniformBuffer(size_t stage, u32 index) {}
void RasterizerNull::FlushAll() {} void RasterizerNull::FlushAll() {}
void RasterizerNull::FlushRegion(VAddr addr, u64 size) {} void RasterizerNull::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) {}
bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size) { bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) {
return false; return false;
} }
void RasterizerNull::InvalidateRegion(VAddr addr, u64 size) {} void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {}
void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {} void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {}
void RasterizerNull::InvalidateGPUCache() {} void RasterizerNull::InvalidateGPUCache() {}
void RasterizerNull::UnmapMemory(VAddr addr, u64 size) {} void RasterizerNull::UnmapMemory(VAddr addr, u64 size) {}
@ -61,7 +61,7 @@ void RasterizerNull::SignalSyncPoint(u32 value) {
} }
void RasterizerNull::SignalReference() {} void RasterizerNull::SignalReference() {}
void RasterizerNull::ReleaseFences() {} void RasterizerNull::ReleaseFences() {}
void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size) {} void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {}
void RasterizerNull::WaitForIdle() {} void RasterizerNull::WaitForIdle() {}
void RasterizerNull::FragmentBarrier() {} void RasterizerNull::FragmentBarrier() {}
void RasterizerNull::TiledCacheBarrier() {} void RasterizerNull::TiledCacheBarrier() {}

@ -38,9 +38,12 @@ public:
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override; void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override; void FlushRegion(VAddr addr, u64 size,
bool MustFlushRegion(VAddr addr, u64 size) override; VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InvalidateRegion(VAddr addr, u64 size) override; bool MustFlushRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void OnCPUWrite(VAddr addr, u64 size) override; void OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override; void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override; void UnmapMemory(VAddr addr, u64 size) override;
@ -50,7 +53,8 @@ public:
void SignalSyncPoint(u32 value) override; void SignalSyncPoint(u32 value) override;
void SignalReference() override; void SignalReference() override;
void ReleaseFences() override; void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void FlushAndInvalidateRegion(
VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void WaitForIdle() override; void WaitForIdle() override;
void FragmentBarrier() override; void FragmentBarrier() override;
void TiledCacheBarrier() override; void TiledCacheBarrier() override;

@ -352,47 +352,61 @@ void RasterizerOpenGL::DisableGraphicsUniformBuffer(size_t stage, u32 index) {
void RasterizerOpenGL::FlushAll() {} void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) { if (addr == 0 || size == 0) {
return; return;
} }
{ if (bool(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex}; std::scoped_lock lock{texture_cache.mutex};
texture_cache.DownloadMemory(addr, size); texture_cache.DownloadMemory(addr, size);
} }
{ if ((bool(which & VideoCommon::CacheType::BufferCache))) {
std::scoped_lock lock{buffer_cache.mutex}; std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.DownloadMemory(addr, size); buffer_cache.DownloadMemory(addr, size);
} }
if ((bool(which & VideoCommon::CacheType::QueryCache))) {
query_cache.FlushRegion(addr, size); query_cache.FlushRegion(addr, size);
} }
}
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; if ((bool(which & VideoCommon::CacheType::BufferCache))) {
std::scoped_lock lock{buffer_cache.mutex};
if (buffer_cache.IsRegionGpuModified(addr, size)) {
return true;
}
}
if (!Settings::IsGPULevelHigh()) { if (!Settings::IsGPULevelHigh()) {
return buffer_cache.IsRegionGpuModified(addr, size); return false;
} }
return texture_cache.IsRegionGpuModified(addr, size) || if (bool(which & VideoCommon::CacheType::TextureCache)) {
buffer_cache.IsRegionGpuModified(addr, size); std::scoped_lock lock{texture_cache.mutex};
return texture_cache.IsRegionGpuModified(addr, size);
}
return false;
} }
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) { if (addr == 0 || size == 0) {
return; return;
} }
{ if (bool(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex}; std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size); texture_cache.WriteMemory(addr, size);
} }
{ if (bool(which & VideoCommon::CacheType::BufferCache)) {
std::scoped_lock lock{buffer_cache.mutex}; std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size); buffer_cache.WriteMemory(addr, size);
} }
if (bool(which & VideoCommon::CacheType::ShaderCache)) {
shader_cache.InvalidateRegion(addr, size); shader_cache.InvalidateRegion(addr, size);
}
if (bool(which & VideoCommon::CacheType::QueryCache)) {
query_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size);
} }
}
void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
@ -458,11 +472,12 @@ void RasterizerOpenGL::ReleaseFences() {
fence_manager.WaitPendingFences(); fence_manager.WaitPendingFences();
} }
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which) {
if (Settings::IsGPULevelExtreme()) { if (Settings::IsGPULevelExtreme()) {
FlushRegion(addr, size); FlushRegion(addr, size, which);
} }
InvalidateRegion(addr, size); InvalidateRegion(addr, size, which);
} }
void RasterizerOpenGL::WaitForIdle() { void RasterizerOpenGL::WaitForIdle() {
@ -531,7 +546,7 @@ void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si
} }
gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size); gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size);
{ {
std::unique_lock<std::mutex> lock{buffer_cache.mutex}; std::unique_lock<std::recursive_mutex> lock{buffer_cache.mutex};
if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) { if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
buffer_cache.WriteMemory(*cpu_addr, copy_size); buffer_cache.WriteMemory(*cpu_addr, copy_size);
} }

@ -77,9 +77,12 @@ public:
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override; void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override; void FlushRegion(VAddr addr, u64 size,
bool MustFlushRegion(VAddr addr, u64 size) override; VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InvalidateRegion(VAddr addr, u64 size) override; bool MustFlushRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void OnCPUWrite(VAddr addr, u64 size) override; void OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override; void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override; void UnmapMemory(VAddr addr, u64 size) override;
@ -89,7 +92,9 @@ public:
void SignalSyncPoint(u32 value) override; void SignalSyncPoint(u32 value) override;
void SignalReference() override; void SignalReference() override;
void ReleaseFences() override; void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void FlushAndInvalidateRegion(
VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void WaitForIdle() override; void WaitForIdle() override;
void FragmentBarrier() override; void FragmentBarrier() override;
void TiledCacheBarrier() override; void TiledCacheBarrier() override;

@ -423,42 +423,59 @@ void Vulkan::RasterizerVulkan::DisableGraphicsUniformBuffer(size_t stage, u32 in
void RasterizerVulkan::FlushAll() {} void RasterizerVulkan::FlushAll() {}
void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
if (addr == 0 || size == 0) { if (addr == 0 || size == 0) {
return; return;
} }
{ if (bool(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex}; std::scoped_lock lock{texture_cache.mutex};
texture_cache.DownloadMemory(addr, size); texture_cache.DownloadMemory(addr, size);
} }
{ if ((bool(which & VideoCommon::CacheType::BufferCache))) {
std::scoped_lock lock{buffer_cache.mutex}; std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.DownloadMemory(addr, size); buffer_cache.DownloadMemory(addr, size);
} }
if ((bool(which & VideoCommon::CacheType::QueryCache))) {
query_cache.FlushRegion(addr, size); query_cache.FlushRegion(addr, size);
} }
bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
return texture_cache.IsRegionGpuModified(addr, size) ||
buffer_cache.IsRegionGpuModified(addr, size);
} }
void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
if ((bool(which & VideoCommon::CacheType::BufferCache))) {
std::scoped_lock lock{buffer_cache.mutex};
if (buffer_cache.IsRegionGpuModified(addr, size)) {
return true;
}
}
if (!Settings::IsGPULevelHigh()) {
return false;
}
if (bool(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex};
return texture_cache.IsRegionGpuModified(addr, size);
}
return false;
}
void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
if (addr == 0 || size == 0) { if (addr == 0 || size == 0) {
return; return;
} }
{ if (bool(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex}; std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size); texture_cache.WriteMemory(addr, size);
} }
{ if ((bool(which & VideoCommon::CacheType::BufferCache))) {
std::scoped_lock lock{buffer_cache.mutex}; std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size); buffer_cache.WriteMemory(addr, size);
} }
pipeline_cache.InvalidateRegion(addr, size); if ((bool(which & VideoCommon::CacheType::QueryCache))) {
query_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size);
} }
if ((bool(which & VideoCommon::CacheType::ShaderCache))) {
pipeline_cache.InvalidateRegion(addr, size);
}
}
void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) { if (addr == 0 || size == 0) {
@ -522,11 +539,12 @@ void RasterizerVulkan::ReleaseFences() {
fence_manager.WaitPendingFences(); fence_manager.WaitPendingFences();
} }
void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which) {
if (Settings::IsGPULevelExtreme()) { if (Settings::IsGPULevelExtreme()) {
FlushRegion(addr, size); FlushRegion(addr, size, which);
} }
InvalidateRegion(addr, size); InvalidateRegion(addr, size, which);
} }
void RasterizerVulkan::WaitForIdle() { void RasterizerVulkan::WaitForIdle() {
@ -602,7 +620,7 @@ void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si
} }
gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size); gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size);
{ {
std::unique_lock<std::mutex> lock{buffer_cache.mutex}; std::unique_lock<std::recursive_mutex> lock{buffer_cache.mutex};
if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) { if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
buffer_cache.WriteMemory(*cpu_addr, copy_size); buffer_cache.WriteMemory(*cpu_addr, copy_size);
} }

@ -73,9 +73,12 @@ public:
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override; void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override; void FlushRegion(VAddr addr, u64 size,
bool MustFlushRegion(VAddr addr, u64 size) override; VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InvalidateRegion(VAddr addr, u64 size) override; bool MustFlushRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void OnCPUWrite(VAddr addr, u64 size) override; void OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override; void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override; void UnmapMemory(VAddr addr, u64 size) override;
@ -85,7 +88,9 @@ public:
void SignalSyncPoint(u32 value) override; void SignalSyncPoint(u32 value) override;
void SignalReference() override; void SignalReference() override;
void ReleaseFences() override; void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void FlushAndInvalidateRegion(
VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void WaitForIdle() override; void WaitForIdle() override;
void FragmentBarrier() override; void FragmentBarrier() override;
void TiledCacheBarrier() override; void TiledCacheBarrier() override;

@ -740,7 +740,8 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
const GPUVAddr gpu_addr = image.gpu_addr; const GPUVAddr gpu_addr = image.gpu_addr;
if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); gpu_memory->ReadBlock(gpu_addr, mapped_span.data(), mapped_span.size_bytes(),
VideoCommon::CacheType::NoTextureCache);
const auto uploads = FullUploadSwizzles(image.info); const auto uploads = FullUploadSwizzles(image.info);
runtime.AccelerateImageUpload(image, staging, uploads); runtime.AccelerateImageUpload(image, staging, uploads);
return; return;

@ -203,7 +203,7 @@ public:
/// Create channel state. /// Create channel state.
void CreateChannel(Tegra::Control::ChannelState& channel) final override; void CreateChannel(Tegra::Control::ChannelState& channel) final override;
std::mutex mutex; std::recursive_mutex mutex;
private: private:
/// Iterate over all page indices in a range /// Iterate over all page indices in a range