Merge pull request #4633 from ReinUsesLisp/gpu-init

video_core: Remove all Core::System references in renderer
master
Rodrigo Locatti 2020-09-10 02:28:54 +07:00 committed by GitHub
commit 663ea382da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
53 changed files with 573 additions and 633 deletions

@ -188,7 +188,6 @@ struct System::Impl {
if (!gpu_core) { if (!gpu_core) {
return ResultStatus::ErrorVideoCore; return ResultStatus::ErrorVideoCore;
} }
gpu_core->Renderer().Rasterizer().SetupDirtyFlags();
is_powered_on = true; is_powered_on = true;
exit_lock = false; exit_lock = false;

@ -51,46 +51,43 @@ public:
bool is_written = false, bool use_fast_cbuf = false) { bool is_written = false, bool use_fast_cbuf = false) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
auto& memory_manager = system.GPU().MemoryManager(); const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); if (!cpu_addr) {
if (!cpu_addr_opt) {
return GetEmptyBuffer(size); return GetEmptyBuffer(size);
} }
const VAddr cpu_addr = *cpu_addr_opt;
// Cache management is a big overhead, so only cache entries with a given size. // Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games. // TODO: Figure out which size is the best for given games.
constexpr std::size_t max_stream_size = 0x800; constexpr std::size_t max_stream_size = 0x800;
if (use_fast_cbuf || size < max_stream_size) { if (use_fast_cbuf || size < max_stream_size) {
if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { if (!is_written && !IsRegionWritten(*cpu_addr, *cpu_addr + size - 1)) {
const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size); const bool is_granular = gpu_memory.IsGranularRange(gpu_addr, size);
if (use_fast_cbuf) { if (use_fast_cbuf) {
u8* dest; u8* dest;
if (is_granular) { if (is_granular) {
dest = memory_manager.GetPointer(gpu_addr); dest = gpu_memory.GetPointer(gpu_addr);
} else { } else {
staging_buffer.resize(size); staging_buffer.resize(size);
dest = staging_buffer.data(); dest = staging_buffer.data();
memory_manager.ReadBlockUnsafe(gpu_addr, dest, size); gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
} }
return ConstBufferUpload(dest, size); return ConstBufferUpload(dest, size);
} }
if (is_granular) { if (is_granular) {
u8* const host_ptr = memory_manager.GetPointer(gpu_addr); u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) { return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
std::memcpy(dest, host_ptr, size); std::memcpy(dest, host_ptr, size);
}); });
} else { } else {
return StreamBufferUpload( return StreamBufferUpload(size, alignment, [this, gpu_addr, size](u8* dest) {
size, alignment, [&memory_manager, gpu_addr, size](u8* dest) { gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
}); });
} }
} }
} }
Buffer* const block = GetBlock(cpu_addr, size); Buffer* const block = GetBlock(*cpu_addr, size);
MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); MapInterval* const map = MapAddress(block, gpu_addr, *cpu_addr, size);
if (!map) { if (!map) {
return GetEmptyBuffer(size); return GetEmptyBuffer(size);
} }
@ -106,7 +103,7 @@ public:
} }
} }
return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()}; return BufferInfo{block->Handle(), block->Offset(*cpu_addr), block->Address()};
} }
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
@ -262,9 +259,11 @@ public:
virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0; virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
protected: protected:
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
std::unique_ptr<StreamBuffer> stream_buffer) Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
: rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {} std::unique_ptr<StreamBuffer> stream_buffer_)
: rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_},
stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {}
~BufferCache() = default; ~BufferCache() = default;
@ -326,14 +325,13 @@ private:
MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) {
const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
if (overlaps.empty()) { if (overlaps.empty()) {
auto& memory_manager = system.GPU().MemoryManager();
const VAddr cpu_addr_end = cpu_addr + size; const VAddr cpu_addr_end = cpu_addr + size;
if (memory_manager.IsGranularRange(gpu_addr, size)) { if (gpu_memory.IsGranularRange(gpu_addr, size)) {
u8* host_ptr = memory_manager.GetPointer(gpu_addr); u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
block->Upload(block->Offset(cpu_addr), size, host_ptr); block->Upload(block->Offset(cpu_addr), size, host_ptr);
} else { } else {
staging_buffer.resize(size); staging_buffer.resize(size);
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); gpu_memory.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
block->Upload(block->Offset(cpu_addr), size, staging_buffer.data()); block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
} }
return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
@ -392,7 +390,7 @@ private:
continue; continue;
} }
staging_buffer.resize(size); staging_buffer.resize(size);
system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); cpu_memory.ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
block->Upload(block->Offset(interval.lower()), size, staging_buffer.data()); block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
} }
} }
@ -431,7 +429,7 @@ private:
const std::size_t size = map->end - map->start; const std::size_t size = map->end - map->start;
staging_buffer.resize(size); staging_buffer.resize(size);
block->Download(block->Offset(map->start), size, staging_buffer.data()); block->Download(block->Offset(map->start), size, staging_buffer.data());
system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); cpu_memory.WriteBlockUnsafe(map->start, staging_buffer.data(), size);
map->MarkAsModified(false, 0); map->MarkAsModified(false, 0);
} }
@ -567,7 +565,8 @@ private:
} }
VideoCore::RasterizerInterface& rasterizer; VideoCore::RasterizerInterface& rasterizer;
Core::System& system; Tegra::MemoryManager& gpu_memory;
Core::Memory::Memory& cpu_memory;
std::unique_ptr<StreamBuffer> stream_buffer; std::unique_ptr<StreamBuffer> stream_buffer;
BufferType stream_buffer_handle; BufferType stream_buffer_handle;

@ -74,8 +74,6 @@ public:
} }
void WaitPendingFences() { void WaitPendingFences() {
auto& gpu{system.GPU()};
auto& memory_manager{gpu.MemoryManager()};
while (!fences.empty()) { while (!fences.empty()) {
TFence& current_fence = fences.front(); TFence& current_fence = fences.front();
if (ShouldWait()) { if (ShouldWait()) {
@ -83,7 +81,7 @@ public:
} }
PopAsyncFlushes(); PopAsyncFlushes();
if (current_fence->IsSemaphore()) { if (current_fence->IsSemaphore()) {
memory_manager.template Write<u32>(current_fence->GetAddress(), gpu_memory.template Write<u32>(current_fence->GetAddress(),
current_fence->GetPayload()); current_fence->GetPayload());
} else { } else {
gpu.IncrementSyncPoint(current_fence->GetPayload()); gpu.IncrementSyncPoint(current_fence->GetPayload());
@ -93,13 +91,13 @@ public:
} }
protected: protected:
FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
TTextureCache& texture_cache, TTBufferCache& buffer_cache, TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
TQueryCache& query_cache) TQueryCache& query_cache_)
: system{system}, rasterizer{rasterizer}, texture_cache{texture_cache}, : rasterizer{rasterizer_}, gpu{gpu_}, gpu_memory{gpu.MemoryManager()},
buffer_cache{buffer_cache}, query_cache{query_cache} {} texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
virtual ~FenceManager() {} virtual ~FenceManager() = default;
/// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is /// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
/// true /// true
@ -113,16 +111,15 @@ protected:
/// Waits until a fence has been signalled by the host GPU. /// Waits until a fence has been signalled by the host GPU.
virtual void WaitFence(TFence& fence) = 0; virtual void WaitFence(TFence& fence) = 0;
Core::System& system;
VideoCore::RasterizerInterface& rasterizer; VideoCore::RasterizerInterface& rasterizer;
Tegra::GPU& gpu;
Tegra::MemoryManager& gpu_memory;
TTextureCache& texture_cache; TTextureCache& texture_cache;
TTBufferCache& buffer_cache; TTBufferCache& buffer_cache;
TQueryCache& query_cache; TQueryCache& query_cache;
private: private:
void TryReleasePendingFences() { void TryReleasePendingFences() {
auto& gpu{system.GPU()};
auto& memory_manager{gpu.MemoryManager()};
while (!fences.empty()) { while (!fences.empty()) {
TFence& current_fence = fences.front(); TFence& current_fence = fences.front();
if (ShouldWait() && !IsFenceSignaled(current_fence)) { if (ShouldWait() && !IsFenceSignaled(current_fence)) {
@ -130,7 +127,7 @@ private:
} }
PopAsyncFlushes(); PopAsyncFlushes();
if (current_fence->IsSemaphore()) { if (current_fence->IsSemaphore()) {
memory_manager.template Write<u32>(current_fence->GetAddress(), gpu_memory.template Write<u32>(current_fence->GetAddress(),
current_fence->GetPayload()); current_fence->GetPayload());
} else { } else {
gpu.IncrementSyncPoint(current_fence->GetPayload()); gpu.IncrementSyncPoint(current_fence->GetPayload());

@ -28,8 +28,8 @@ namespace Tegra {
MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
GPU::GPU(Core::System& system_, bool is_async_) GPU::GPU(Core::System& system_, bool is_async_)
: system{system_}, dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)},
memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)},
maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
fermi_2d{std::make_unique<Engines::Fermi2D>()}, fermi_2d{std::make_unique<Engines::Fermi2D>()},
kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},

@ -347,12 +347,11 @@ private:
protected: protected:
Core::System& system; Core::System& system;
std::unique_ptr<Tegra::MemoryManager> memory_manager;
std::unique_ptr<Tegra::DmaPusher> dma_pusher; std::unique_ptr<Tegra::DmaPusher> dma_pusher;
std::unique_ptr<VideoCore::RendererBase> renderer; std::unique_ptr<VideoCore::RendererBase> renderer;
private: private:
std::unique_ptr<Tegra::MemoryManager> memory_manager;
/// Mapping of command subchannels to their bound engine ids /// Mapping of command subchannels to their bound engine ids
std::array<EngineID, 8> bound_engines = {}; std::array<EngineID, 8> bound_engines = {};
/// 3D engine /// 3D engine

@ -95,9 +95,11 @@ template <class QueryCache, class CachedQuery, class CounterStream, class HostCo
class QueryPool> class QueryPool>
class QueryCacheBase { class QueryCacheBase {
public: public:
explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
: system{system}, rasterizer{rasterizer}, streams{{CounterStream{ Tegra::Engines::Maxwell3D& maxwell3d_,
static_cast<QueryCache&>(*this), Tegra::MemoryManager& gpu_memory_)
: rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
gpu_memory{gpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
VideoCore::QueryType::SamplesPassed}}} {} VideoCore::QueryType::SamplesPassed}}} {}
void InvalidateRegion(VAddr addr, std::size_t size) { void InvalidateRegion(VAddr addr, std::size_t size) {
@ -118,29 +120,27 @@ public:
*/ */
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
auto& memory_manager = system.GPU().MemoryManager(); const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); ASSERT(cpu_addr);
ASSERT(cpu_addr_opt);
VAddr cpu_addr = *cpu_addr_opt;
CachedQuery* query = TryGet(cpu_addr); CachedQuery* query = TryGet(*cpu_addr);
if (!query) { if (!query) {
ASSERT_OR_EXECUTE(cpu_addr_opt, return;); ASSERT_OR_EXECUTE(cpu_addr, return;);
const auto host_ptr = memory_manager.GetPointer(gpu_addr); u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
query = Register(type, cpu_addr, host_ptr, timestamp.has_value()); query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
} }
query->BindCounter(Stream(type).Current(), timestamp); query->BindCounter(Stream(type).Current(), timestamp);
if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
AsyncFlushQuery(cpu_addr); AsyncFlushQuery(*cpu_addr);
} }
} }
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
void UpdateCounters() { void UpdateCounters() {
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = maxwell3d.regs;
Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
} }
@ -270,8 +270,9 @@ private:
static constexpr std::uintptr_t PAGE_SIZE = 4096; static constexpr std::uintptr_t PAGE_SIZE = 4096;
static constexpr unsigned PAGE_BITS = 12; static constexpr unsigned PAGE_BITS = 12;
Core::System& system;
VideoCore::RasterizerInterface& rasterizer; VideoCore::RasterizerInterface& rasterizer;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::MemoryManager& gpu_memory;
std::recursive_mutex mutex; std::recursive_mutex mutex;

@ -106,11 +106,8 @@ public:
virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {} virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
/// Initialize disk cached resources for the game being emulated /// Initialize disk cached resources for the game being emulated
virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, virtual void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
const DiskResourceLoadCallback& callback = {}) {} const DiskResourceLoadCallback& callback) {}
/// Initializes renderer dirty flags
virtual void SetupDirtyFlags() {}
/// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
GuestDriverProfile& AccessGuestDriverProfile() { GuestDriverProfile& AccessGuestDriverProfile() {

@ -59,9 +59,10 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
} }
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const Device& device_, std::size_t stream_size) const Device& device_, std::size_t stream_size)
: GenericBufferCache{rasterizer, system, : GenericBufferCache{rasterizer, gpu_memory, cpu_memory,
std::make_unique<OGLStreamBuffer>(device_, stream_size, true)}, std::make_unique<OGLStreamBuffer>(device_, stream_size, true)},
device{device_} { device{device_} {
if (!device.HasFastBufferSubData()) { if (!device.HasFastBufferSubData()) {

@ -52,7 +52,8 @@ private:
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
class OGLBufferCache final : public GenericBufferCache { class OGLBufferCache final : public GenericBufferCache {
public: public:
explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const Device& device, std::size_t stream_size); const Device& device, std::size_t stream_size);
~OGLBufferCache(); ~OGLBufferCache();

@ -45,11 +45,10 @@ void GLInnerFence::Wait() {
glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED); glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
} }
FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system, FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
VideoCore::RasterizerInterface& rasterizer,
TextureCacheOpenGL& texture_cache, TextureCacheOpenGL& texture_cache,
OGLBufferCache& buffer_cache, QueryCache& query_cache) OGLBufferCache& buffer_cache, QueryCache& query_cache)
: GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {} : GenericFenceManager{rasterizer, gpu, texture_cache, buffer_cache, query_cache} {}
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(value, is_stubbed); return std::make_shared<GLInnerFence>(value, is_stubbed);

@ -37,7 +37,7 @@ using GenericFenceManager =
class FenceManagerOpenGL final : public GenericFenceManager { class FenceManagerOpenGL final : public GenericFenceManager {
public: public:
FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache, TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
QueryCache& query_cache); QueryCache& query_cache);

@ -30,12 +30,13 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
} // Anonymous namespace } // Anonymous namespace
QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer) QueryCache::QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::MemoryManager& gpu_memory)
: VideoCommon::QueryCacheBase< : VideoCommon::QueryCacheBase<
QueryCache, CachedQuery, CounterStream, HostCounter, QueryCache, CachedQuery, CounterStream, HostCounter,
std::vector<OGLQuery>>{system, std::vector<OGLQuery>>{static_cast<VideoCore::RasterizerInterface&>(rasterizer),
static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)}, maxwell3d, gpu_memory},
gl_rasterizer{gl_rasterizer} {} gl_rasterizer{rasterizer} {}
QueryCache::~QueryCache() = default; QueryCache::~QueryCache() = default;

@ -29,7 +29,8 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
HostCounter, std::vector<OGLQuery>> { HostCounter, std::vector<OGLQuery>> {
public: public:
explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); explicit QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::MemoryManager& gpu_memory);
~QueryCache(); ~QueryCache();
OGLQuery AllocateQuery(VideoCore::QueryType type); OGLQuery AllocateQuery(VideoCore::QueryType type);

@ -153,16 +153,19 @@ void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t nu
} // Anonymous namespace } // Anonymous namespace
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_,
const Device& device, ScreenInfo& info, Core::Memory::Memory& cpu_memory, const Device& device_,
ProgramManager& program_manager, StateTracker& state_tracker) ScreenInfo& screen_info_, ProgramManager& program_manager_,
: RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device, StateTracker& state_tracker_)
state_tracker}, : RasterizerAccelerated{cpu_memory}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker),
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker}, shader_cache(*this, emu_window, gpu, maxwell3d, kepler_compute, gpu_memory, device),
async_shaders{emu_window} { query_cache(*this, maxwell3d, gpu_memory),
buffer_cache(*this, gpu_memory, cpu_memory, device, STREAM_BUFFER_SIZE),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
async_shaders(emu_window) {
CheckExtensions(); CheckExtensions();
unified_uniform_buffer.Create(); unified_uniform_buffer.Create();
@ -196,8 +199,7 @@ void RasterizerOpenGL::CheckExtensions() {
} }
void RasterizerOpenGL::SetupVertexFormat() { void RasterizerOpenGL::SetupVertexFormat() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::VertexFormats]) { if (!flags[Dirty::VertexFormats]) {
return; return;
} }
@ -217,7 +219,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
} }
flags[Dirty::VertexFormat0 + index] = false; flags[Dirty::VertexFormat0 + index] = false;
const auto attrib = gpu.regs.vertex_attrib_format[index]; const auto attrib = maxwell3d.regs.vertex_attrib_format[index];
const auto gl_index = static_cast<GLuint>(index); const auto gl_index = static_cast<GLuint>(index);
// Disable constant attributes. // Disable constant attributes.
@ -241,8 +243,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
} }
void RasterizerOpenGL::SetupVertexBuffer() { void RasterizerOpenGL::SetupVertexBuffer() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::VertexBuffers]) { if (!flags[Dirty::VertexBuffers]) {
return; return;
} }
@ -253,7 +254,7 @@ void RasterizerOpenGL::SetupVertexBuffer() {
const bool use_unified_memory = device.HasVertexBufferUnifiedMemory(); const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
// Upload all guest vertex arrays sequentially to our buffer // Upload all guest vertex arrays sequentially to our buffer
const auto& regs = gpu.regs; const auto& regs = maxwell3d.regs;
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) { for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
if (!flags[Dirty::VertexBuffer0 + index]) { if (!flags[Dirty::VertexBuffer0 + index]) {
continue; continue;
@ -290,14 +291,13 @@ void RasterizerOpenGL::SetupVertexBuffer() {
} }
void RasterizerOpenGL::SetupVertexInstances() { void RasterizerOpenGL::SetupVertexInstances() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::VertexInstances]) { if (!flags[Dirty::VertexInstances]) {
return; return;
} }
flags[Dirty::VertexInstances] = false; flags[Dirty::VertexInstances] = false;
const auto& regs = gpu.regs; const auto& regs = maxwell3d.regs;
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) { for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
if (!flags[Dirty::VertexInstance0 + index]) { if (!flags[Dirty::VertexInstance0 + index]) {
continue; continue;
@ -313,7 +313,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
GLintptr RasterizerOpenGL::SetupIndexBuffer() { GLintptr RasterizerOpenGL::SetupIndexBuffer() {
MICROPROFILE_SCOPE(OpenGL_Index); MICROPROFILE_SCOPE(OpenGL_Index);
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = maxwell3d.regs;
const std::size_t size = CalculateIndexBufferSize(); const std::size_t size = CalculateIndexBufferSize();
const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
@ -322,15 +322,14 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
MICROPROFILE_SCOPE(OpenGL_Shader); MICROPROFILE_SCOPE(OpenGL_Shader);
auto& gpu = system.GPU().Maxwell3D();
u32 clip_distances = 0; u32 clip_distances = 0;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto& shader_config = gpu.regs.shader_config[index]; const auto& shader_config = maxwell3d.regs.shader_config[index];
const auto program{static_cast<Maxwell::ShaderProgram>(index)}; const auto program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled // Skip stages that are not enabled
if (!gpu.regs.IsShaderConfigEnabled(index)) { if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
switch (program) { switch (program) {
case Maxwell::ShaderProgram::Geometry: case Maxwell::ShaderProgram::Geometry:
program_manager.UseGeometryShader(0); program_manager.UseGeometryShader(0);
@ -391,11 +390,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
} }
SyncClipEnabled(clip_distances); SyncClipEnabled(clip_distances);
gpu.dirty.flags[Dirty::Shaders] = false; maxwell3d.dirty.flags[Dirty::Shaders] = false;
} }
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = maxwell3d.regs;
std::size_t size = 0; std::size_t size = 0;
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
@ -413,34 +412,27 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
} }
std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
const auto& regs = system.GPU().Maxwell3D().regs; return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
return static_cast<std::size_t>(regs.index_array.count) *
static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
} }
void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading, void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) { const VideoCore::DiskResourceLoadCallback& callback) {
shader_cache.LoadDiskCache(stop_loading, callback); shader_cache.LoadDiskCache(title_id, stop_loading, callback);
}
void RasterizerOpenGL::SetupDirtyFlags() {
state_tracker.Initialize();
} }
void RasterizerOpenGL::ConfigureFramebuffers() { void RasterizerOpenGL::ConfigureFramebuffers() {
MICROPROFILE_SCOPE(OpenGL_Framebuffer); MICROPROFILE_SCOPE(OpenGL_Framebuffer);
auto& gpu = system.GPU().Maxwell3D(); if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
if (!gpu.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
return; return;
} }
gpu.dirty.flags[VideoCommon::Dirty::RenderTargets] = false; maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
texture_cache.GuardRenderTargets(true); texture_cache.GuardRenderTargets(true);
View depth_surface = texture_cache.GetDepthBufferSurface(true); View depth_surface = texture_cache.GetDepthBufferSurface(true);
const auto& regs = gpu.regs; const auto& regs = maxwell3d.regs;
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
// Bind the framebuffer surfaces // Bind the framebuffer surfaces
@ -472,8 +464,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
} }
void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) { void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
auto& gpu = system.GPU().Maxwell3D(); const auto& regs = maxwell3d.regs;
const auto& regs = gpu.regs;
texture_cache.GuardRenderTargets(true); texture_cache.GuardRenderTargets(true);
View color_surface; View color_surface;
@ -523,12 +514,11 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_de
} }
void RasterizerOpenGL::Clear() { void RasterizerOpenGL::Clear() {
const auto& gpu = system.GPU().Maxwell3D(); if (!maxwell3d.ShouldExecute()) {
if (!gpu.ShouldExecute()) {
return; return;
} }
const auto& regs = gpu.regs; const auto& regs = maxwell3d.regs;
bool use_color{}; bool use_color{};
bool use_depth{}; bool use_depth{};
bool use_stencil{}; bool use_stencil{};
@ -593,7 +583,6 @@ void RasterizerOpenGL::Clear() {
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(OpenGL_Drawing); MICROPROFILE_SCOPE(OpenGL_Drawing);
auto& gpu = system.GPU().Maxwell3D();
query_cache.UpdateCounters(); query_cache.UpdateCounters();
@ -641,7 +630,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
if (invalidated) { if (invalidated) {
// When the stream buffer has been invalidated, we have to consider vertex buffers as dirty // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
auto& dirty = gpu.dirty.flags; auto& dirty = maxwell3d.dirty.flags;
dirty[Dirty::VertexBuffers] = true; dirty[Dirty::VertexBuffers] = true;
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
dirty[index] = true; dirty[index] = true;
@ -662,7 +651,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
// Setup emulation uniform buffer. // Setup emulation uniform buffer.
if (!device.UseAssemblyShaders()) { if (!device.UseAssemblyShaders()) {
MaxwellUniformData ubo; MaxwellUniformData ubo;
ubo.SetFromRegs(gpu); ubo.SetFromRegs(maxwell3d);
const auto info = const auto info =
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset, glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
@ -671,7 +660,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
// Setup shaders and their used resources. // Setup shaders and their used resources.
texture_cache.GuardSamplers(true); texture_cache.GuardSamplers(true);
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
SetupShaders(primitive_mode); SetupShaders(primitive_mode);
texture_cache.GuardSamplers(false); texture_cache.GuardSamplers(false);
@ -688,14 +677,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
BeginTransformFeedback(primitive_mode); BeginTransformFeedback(primitive_mode);
const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
const GLsizei num_instances = const GLsizei num_instances =
static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1); static_cast<GLsizei>(is_instanced ? maxwell3d.mme_draw.instance_count : 1);
if (is_indexed) { if (is_indexed) {
const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base); const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count); const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset); const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
const GLenum format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format); const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) { if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
glDrawElements(primitive_mode, num_vertices, format, offset); glDrawElements(primitive_mode, num_vertices, format, offset);
} else if (num_instances == 1 && base_instance == 0) { } else if (num_instances == 1 && base_instance == 0) {
@ -714,8 +703,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
base_instance); base_instance);
} }
} else { } else {
const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first); const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vertex_buffer.first);
const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count); const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.vertex_buffer.count);
if (num_instances == 1 && base_instance == 0) { if (num_instances == 1 && base_instance == 0) {
glDrawArrays(primitive_mode, base_vertex, num_vertices); glDrawArrays(primitive_mode, base_vertex, num_vertices);
} else if (base_instance == 0) { } else if (base_instance == 0) {
@ -730,7 +719,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
++num_queued_commands; ++num_queued_commands;
system.GPU().TickWork(); gpu.TickWork();
} }
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@ -753,7 +742,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
buffer_cache.Unmap(); buffer_cache.Unmap();
const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const auto& launch_desc = kepler_compute.launch_description;
program_manager.BindCompute(kernel->GetHandle());
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands; ++num_queued_commands;
} }
@ -815,17 +805,14 @@ void RasterizerOpenGL::SyncGuestHost() {
} }
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) { if (!gpu.IsAsync()) {
auto& memory_manager{gpu.MemoryManager()}; gpu_memory.Write<u32>(addr, value);
memory_manager.Write<u32>(addr, value);
return; return;
} }
fence_manager.SignalSemaphore(addr, value); fence_manager.SignalSemaphore(addr, value);
} }
void RasterizerOpenGL::SignalSyncPoint(u32 value) { void RasterizerOpenGL::SignalSyncPoint(u32 value) {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) { if (!gpu.IsAsync()) {
gpu.IncrementSyncPoint(value); gpu.IncrementSyncPoint(value);
return; return;
@ -834,7 +821,6 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) {
} }
void RasterizerOpenGL::ReleaseFences() { void RasterizerOpenGL::ReleaseFences() {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) { if (!gpu.IsAsync()) {
return; return;
} }
@ -920,7 +906,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* sh
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV}; GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
MICROPROFILE_SCOPE(OpenGL_UBO); MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& stages = system.GPU().Maxwell3D().state.shader_stages; const auto& stages = maxwell3d.state.shader_stages;
const auto& shader_stage = stages[stage_index]; const auto& shader_stage = stages[stage_index];
const auto& entries = shader->GetEntries(); const auto& entries = shader->GetEntries();
const bool use_unified = entries.use_unified_uniforms; const bool use_unified = entries.use_unified_uniforms;
@ -945,7 +931,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* sh
void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) { void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
MICROPROFILE_SCOPE(OpenGL_UBO); MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const auto& launch_desc = kepler_compute.launch_description;
const auto& entries = kernel->GetEntries(); const auto& entries = kernel->GetEntries();
const bool use_unified = entries.use_unified_uniforms; const bool use_unified = entries.use_unified_uniforms;
@ -1018,9 +1004,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
}; };
auto& gpu{system.GPU()}; const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
auto& memory_manager{gpu.MemoryManager()};
const auto& cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
const auto& entries{shader->GetEntries().global_memory_entries}; const auto& entries{shader->GetEntries().global_memory_entries};
std::array<GLuint64EXT, 32> pointers; std::array<GLuint64EXT, 32> pointers;
@ -1030,8 +1014,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
for (const auto& entry : entries) { for (const auto& entry : entries) {
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
const u32 size{memory_manager.Read<u32>(addr + 8)}; const u32 size{gpu_memory.Read<u32>(addr + 8)};
SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
++binding; ++binding;
} }
@ -1041,9 +1025,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
} }
void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
auto& gpu{system.GPU()}; const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
auto& memory_manager{gpu.MemoryManager()};
const auto& cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
const auto& entries{kernel->GetEntries().global_memory_entries}; const auto& entries{kernel->GetEntries().global_memory_entries};
std::array<GLuint64EXT, 32> pointers; std::array<GLuint64EXT, 32> pointers;
@ -1052,8 +1034,8 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
u32 binding = 0; u32 binding = 0;
for (const auto& entry : entries) { for (const auto& entry : entries) {
const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
const u32 size{memory_manager.Read<u32>(addr + 8)}; const u32 size{gpu_memory.Read<u32>(addr + 8)};
SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
++binding; ++binding;
} }
@ -1077,7 +1059,6 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
MICROPROFILE_SCOPE(OpenGL_Texture); MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).sampler; u32 binding = device.GetBaseBindings(stage_index).sampler;
for (const auto& entry : shader->GetEntries().samplers) { for (const auto& entry : shader->GetEntries().samplers) {
const auto shader_type = static_cast<ShaderType>(stage_index); const auto shader_type = static_cast<ShaderType>(stage_index);
@ -1090,11 +1071,10 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader
void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) { void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
MICROPROFILE_SCOPE(OpenGL_Texture); MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0; u32 binding = 0;
for (const auto& entry : kernel->GetEntries().samplers) { for (const auto& entry : kernel->GetEntries().samplers) {
for (std::size_t i = 0; i < entry.size; ++i) { for (std::size_t i = 0; i < entry.size; ++i) {
const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i); const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i);
SetupTexture(binding++, texture, entry); SetupTexture(binding++, texture, entry);
} }
} }
@ -1118,20 +1098,18 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
} }
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) { void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).image; u32 binding = device.GetBaseBindings(stage_index).image;
for (const auto& entry : shader->GetEntries().images) { for (const auto& entry : shader->GetEntries().images) {
const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); const auto shader_type = static_cast<ShaderType>(stage_index);
const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
SetupImage(binding++, tic, entry); SetupImage(binding++, tic, entry);
} }
} }
void RasterizerOpenGL::SetupComputeImages(Shader* shader) { void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0; u32 binding = 0;
for (const auto& entry : shader->GetEntries().images) { for (const auto& entry : shader->GetEntries().images) {
const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic;
SetupImage(binding++, tic, entry); SetupImage(binding++, tic, entry);
} }
} }
@ -1151,9 +1129,8 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t
} }
void RasterizerOpenGL::SyncViewport() { void RasterizerOpenGL::SyncViewport() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags; const auto& regs = maxwell3d.regs;
const auto& regs = gpu.regs;
const bool dirty_viewport = flags[Dirty::Viewports]; const bool dirty_viewport = flags[Dirty::Viewports];
const bool dirty_clip_control = flags[Dirty::ClipControl]; const bool dirty_clip_control = flags[Dirty::ClipControl];
@ -1225,25 +1202,23 @@ void RasterizerOpenGL::SyncViewport() {
} }
void RasterizerOpenGL::SyncDepthClamp() { void RasterizerOpenGL::SyncDepthClamp() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::DepthClampEnabled]) { if (!flags[Dirty::DepthClampEnabled]) {
return; return;
} }
flags[Dirty::DepthClampEnabled] = false; flags[Dirty::DepthClampEnabled] = false;
oglEnable(GL_DEPTH_CLAMP, gpu.regs.view_volume_clip_control.depth_clamp_disabled == 0); oglEnable(GL_DEPTH_CLAMP, maxwell3d.regs.view_volume_clip_control.depth_clamp_disabled == 0);
} }
void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) { void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) { if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
return; return;
} }
flags[Dirty::ClipDistances] = false; flags[Dirty::ClipDistances] = false;
clip_mask &= gpu.regs.clip_distance_enabled; clip_mask &= maxwell3d.regs.clip_distance_enabled;
if (clip_mask == last_clip_distance_mask) { if (clip_mask == last_clip_distance_mask) {
return; return;
} }
@ -1259,9 +1234,8 @@ void RasterizerOpenGL::SyncClipCoef() {
} }
void RasterizerOpenGL::SyncCullMode() { void RasterizerOpenGL::SyncCullMode() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags; const auto& regs = maxwell3d.regs;
const auto& regs = gpu.regs;
if (flags[Dirty::CullTest]) { if (flags[Dirty::CullTest]) {
flags[Dirty::CullTest] = false; flags[Dirty::CullTest] = false;
@ -1276,26 +1250,24 @@ void RasterizerOpenGL::SyncCullMode() {
} }
void RasterizerOpenGL::SyncPrimitiveRestart() { void RasterizerOpenGL::SyncPrimitiveRestart() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::PrimitiveRestart]) { if (!flags[Dirty::PrimitiveRestart]) {
return; return;
} }
flags[Dirty::PrimitiveRestart] = false; flags[Dirty::PrimitiveRestart] = false;
if (gpu.regs.primitive_restart.enabled) { if (maxwell3d.regs.primitive_restart.enabled) {
glEnable(GL_PRIMITIVE_RESTART); glEnable(GL_PRIMITIVE_RESTART);
glPrimitiveRestartIndex(gpu.regs.primitive_restart.index); glPrimitiveRestartIndex(maxwell3d.regs.primitive_restart.index);
} else { } else {
glDisable(GL_PRIMITIVE_RESTART); glDisable(GL_PRIMITIVE_RESTART);
} }
} }
void RasterizerOpenGL::SyncDepthTestState() { void RasterizerOpenGL::SyncDepthTestState() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags; const auto& regs = maxwell3d.regs;
const auto& regs = gpu.regs;
if (flags[Dirty::DepthMask]) { if (flags[Dirty::DepthMask]) {
flags[Dirty::DepthMask] = false; flags[Dirty::DepthMask] = false;
glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE); glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE);
@ -1313,14 +1285,13 @@ void RasterizerOpenGL::SyncDepthTestState() {
} }
void RasterizerOpenGL::SyncStencilTestState() { void RasterizerOpenGL::SyncStencilTestState() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::StencilTest]) { if (!flags[Dirty::StencilTest]) {
return; return;
} }
flags[Dirty::StencilTest] = false; flags[Dirty::StencilTest] = false;
const auto& regs = gpu.regs; const auto& regs = maxwell3d.regs;
oglEnable(GL_STENCIL_TEST, regs.stencil_enable); oglEnable(GL_STENCIL_TEST, regs.stencil_enable);
glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func), glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func),
@ -1345,25 +1316,24 @@ void RasterizerOpenGL::SyncStencilTestState() {
} }
void RasterizerOpenGL::SyncRasterizeEnable() { void RasterizerOpenGL::SyncRasterizeEnable() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::RasterizeEnable]) { if (!flags[Dirty::RasterizeEnable]) {
return; return;
} }
flags[Dirty::RasterizeEnable] = false; flags[Dirty::RasterizeEnable] = false;
oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0); oglEnable(GL_RASTERIZER_DISCARD, maxwell3d.regs.rasterize_enable == 0);
} }
void RasterizerOpenGL::SyncPolygonModes() { void RasterizerOpenGL::SyncPolygonModes() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::PolygonModes]) { if (!flags[Dirty::PolygonModes]) {
return; return;
} }
flags[Dirty::PolygonModes] = false; flags[Dirty::PolygonModes] = false;
if (gpu.regs.fill_rectangle) { const auto& regs = maxwell3d.regs;
if (regs.fill_rectangle) {
if (!GLAD_GL_NV_fill_rectangle) { if (!GLAD_GL_NV_fill_rectangle) {
LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported"); LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported");
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
@ -1376,27 +1346,26 @@ void RasterizerOpenGL::SyncPolygonModes() {
return; return;
} }
if (gpu.regs.polygon_mode_front == gpu.regs.polygon_mode_back) { if (regs.polygon_mode_front == regs.polygon_mode_back) {
flags[Dirty::PolygonModeFront] = false; flags[Dirty::PolygonModeFront] = false;
flags[Dirty::PolygonModeBack] = false; flags[Dirty::PolygonModeBack] = false;
glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front)); glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_front));
return; return;
} }
if (flags[Dirty::PolygonModeFront]) { if (flags[Dirty::PolygonModeFront]) {
flags[Dirty::PolygonModeFront] = false; flags[Dirty::PolygonModeFront] = false;
glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front)); glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(regs.polygon_mode_front));
} }
if (flags[Dirty::PolygonModeBack]) { if (flags[Dirty::PolygonModeBack]) {
flags[Dirty::PolygonModeBack] = false; flags[Dirty::PolygonModeBack] = false;
glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_back)); glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_back));
} }
} }
void RasterizerOpenGL::SyncColorMask() { void RasterizerOpenGL::SyncColorMask() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::ColorMasks]) { if (!flags[Dirty::ColorMasks]) {
return; return;
} }
@ -1405,7 +1374,7 @@ void RasterizerOpenGL::SyncColorMask() {
const bool force = flags[Dirty::ColorMaskCommon]; const bool force = flags[Dirty::ColorMaskCommon];
flags[Dirty::ColorMaskCommon] = false; flags[Dirty::ColorMaskCommon] = false;
const auto& regs = gpu.regs; const auto& regs = maxwell3d.regs;
if (regs.color_mask_common) { if (regs.color_mask_common) {
if (!force && !flags[Dirty::ColorMask0]) { if (!force && !flags[Dirty::ColorMask0]) {
return; return;
@ -1430,33 +1399,30 @@ void RasterizerOpenGL::SyncColorMask() {
} }
void RasterizerOpenGL::SyncMultiSampleState() { void RasterizerOpenGL::SyncMultiSampleState() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::MultisampleControl]) { if (!flags[Dirty::MultisampleControl]) {
return; return;
} }
flags[Dirty::MultisampleControl] = false; flags[Dirty::MultisampleControl] = false;
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = maxwell3d.regs;
oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage); oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage);
oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one); oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one);
} }
void RasterizerOpenGL::SyncFragmentColorClampState() { void RasterizerOpenGL::SyncFragmentColorClampState() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::FragmentClampColor]) { if (!flags[Dirty::FragmentClampColor]) {
return; return;
} }
flags[Dirty::FragmentClampColor] = false; flags[Dirty::FragmentClampColor] = false;
glClampColor(GL_CLAMP_FRAGMENT_COLOR, gpu.regs.frag_color_clamp ? GL_TRUE : GL_FALSE); glClampColor(GL_CLAMP_FRAGMENT_COLOR, maxwell3d.regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
} }
void RasterizerOpenGL::SyncBlendState() { void RasterizerOpenGL::SyncBlendState() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags; const auto& regs = maxwell3d.regs;
const auto& regs = gpu.regs;
if (flags[Dirty::BlendColor]) { if (flags[Dirty::BlendColor]) {
flags[Dirty::BlendColor] = false; flags[Dirty::BlendColor] = false;
@ -1513,14 +1479,13 @@ void RasterizerOpenGL::SyncBlendState() {
} }
void RasterizerOpenGL::SyncLogicOpState() { void RasterizerOpenGL::SyncLogicOpState() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::LogicOp]) { if (!flags[Dirty::LogicOp]) {
return; return;
} }
flags[Dirty::LogicOp] = false; flags[Dirty::LogicOp] = false;
const auto& regs = gpu.regs; const auto& regs = maxwell3d.regs;
if (regs.logic_op.enable) { if (regs.logic_op.enable) {
glEnable(GL_COLOR_LOGIC_OP); glEnable(GL_COLOR_LOGIC_OP);
glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation)); glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation));
@ -1530,14 +1495,13 @@ void RasterizerOpenGL::SyncLogicOpState() {
} }
void RasterizerOpenGL::SyncScissorTest() { void RasterizerOpenGL::SyncScissorTest() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::Scissors]) { if (!flags[Dirty::Scissors]) {
return; return;
} }
flags[Dirty::Scissors] = false; flags[Dirty::Scissors] = false;
const auto& regs = gpu.regs; const auto& regs = maxwell3d.regs;
for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) { for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) {
if (!flags[Dirty::Scissor0 + index]) { if (!flags[Dirty::Scissor0 + index]) {
continue; continue;
@ -1556,16 +1520,15 @@ void RasterizerOpenGL::SyncScissorTest() {
} }
void RasterizerOpenGL::SyncPointState() { void RasterizerOpenGL::SyncPointState() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::PointSize]) { if (!flags[Dirty::PointSize]) {
return; return;
} }
flags[Dirty::PointSize] = false; flags[Dirty::PointSize] = false;
oglEnable(GL_POINT_SPRITE, gpu.regs.point_sprite_enable); oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
if (gpu.regs.vp_point_size.enable) { if (maxwell3d.regs.vp_point_size.enable) {
// By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled. // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
glEnable(GL_PROGRAM_POINT_SIZE); glEnable(GL_PROGRAM_POINT_SIZE);
return; return;
@ -1573,32 +1536,30 @@ void RasterizerOpenGL::SyncPointState() {
// Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
// in OpenGL). // in OpenGL).
glPointSize(std::max(1.0f, gpu.regs.point_size)); glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
glDisable(GL_PROGRAM_POINT_SIZE); glDisable(GL_PROGRAM_POINT_SIZE);
} }
void RasterizerOpenGL::SyncLineState() { void RasterizerOpenGL::SyncLineState() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::LineWidth]) { if (!flags[Dirty::LineWidth]) {
return; return;
} }
flags[Dirty::LineWidth] = false; flags[Dirty::LineWidth] = false;
const auto& regs = gpu.regs; const auto& regs = maxwell3d.regs;
oglEnable(GL_LINE_SMOOTH, regs.line_smooth_enable); oglEnable(GL_LINE_SMOOTH, regs.line_smooth_enable);
glLineWidth(regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased); glLineWidth(regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased);
} }
void RasterizerOpenGL::SyncPolygonOffset() { void RasterizerOpenGL::SyncPolygonOffset() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::PolygonOffset]) { if (!flags[Dirty::PolygonOffset]) {
return; return;
} }
flags[Dirty::PolygonOffset] = false; flags[Dirty::PolygonOffset] = false;
const auto& regs = gpu.regs; const auto& regs = maxwell3d.regs;
oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable); oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable);
oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable); oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable);
oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable); oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable);
@ -1612,14 +1573,13 @@ void RasterizerOpenGL::SyncPolygonOffset() {
} }
void RasterizerOpenGL::SyncAlphaTest() { void RasterizerOpenGL::SyncAlphaTest() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::AlphaTest]) { if (!flags[Dirty::AlphaTest]) {
return; return;
} }
flags[Dirty::AlphaTest] = false; flags[Dirty::AlphaTest] = false;
const auto& regs = gpu.regs; const auto& regs = maxwell3d.regs;
if (regs.alpha_test_enabled && regs.rt_control.count > 1) { if (regs.alpha_test_enabled && regs.rt_control.count > 1) {
LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested"); LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested");
} }
@ -1633,20 +1593,19 @@ void RasterizerOpenGL::SyncAlphaTest() {
} }
void RasterizerOpenGL::SyncFramebufferSRGB() { void RasterizerOpenGL::SyncFramebufferSRGB() {
auto& gpu = system.GPU().Maxwell3D(); auto& flags = maxwell3d.dirty.flags;
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::FramebufferSRGB]) { if (!flags[Dirty::FramebufferSRGB]) {
return; return;
} }
flags[Dirty::FramebufferSRGB] = false; flags[Dirty::FramebufferSRGB] = false;
oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb);
} }
void RasterizerOpenGL::SyncTransformFeedback() { void RasterizerOpenGL::SyncTransformFeedback() {
// TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
// when this is required. // when this is required.
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = maxwell3d.regs;
static constexpr std::size_t STRIDE = 3; static constexpr std::size_t STRIDE = 3;
std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs; std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
@ -1698,7 +1657,7 @@ void RasterizerOpenGL::SyncTransformFeedback() {
} }
void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) { if (regs.tfb_enabled == 0) {
return; return;
} }
@ -1741,7 +1700,7 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
} }
void RasterizerOpenGL::EndTransformFeedback() { void RasterizerOpenGL::EndTransformFeedback() {
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) { if (regs.tfb_enabled == 0) {
return; return;
} }

@ -36,8 +36,8 @@
#include "video_core/shader/async_shaders.h" #include "video_core/shader/async_shaders.h"
#include "video_core/textures/texture.h" #include "video_core/textures/texture.h"
namespace Core { namespace Core::Memory {
class System; class Memory;
} }
namespace Core::Frontend { namespace Core::Frontend {
@ -55,9 +55,10 @@ struct DrawParameters;
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public: public:
explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
const Device& device, ScreenInfo& info, Core::Memory::Memory& cpu_memory, const Device& device,
ProgramManager& program_manager, StateTracker& state_tracker); ScreenInfo& screen_info, ProgramManager& program_manager,
StateTracker& state_tracker);
~RasterizerOpenGL() override; ~RasterizerOpenGL() override;
void Draw(bool is_indexed, bool is_instanced) override; void Draw(bool is_indexed, bool is_instanced) override;
@ -83,9 +84,8 @@ public:
const Tegra::Engines::Fermi2D::Config& copy_config) override; const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override; u32 pixel_stride) override;
void LoadDiskResources(const std::atomic_bool& stop_loading, void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override; const VideoCore::DiskResourceLoadCallback& callback) override;
void SetupDirtyFlags() override;
/// Returns true when there are commands queued to the OpenGL server. /// Returns true when there are commands queued to the OpenGL server.
bool AnyCommandQueued() const { bool AnyCommandQueued() const {
@ -237,7 +237,15 @@ private:
void SetupShaders(GLenum primitive_mode); void SetupShaders(GLenum primitive_mode);
Tegra::GPU& gpu;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
const Device& device; const Device& device;
ScreenInfo& screen_info;
ProgramManager& program_manager;
StateTracker& state_tracker;
TextureCacheOpenGL texture_cache; TextureCacheOpenGL texture_cache;
ShaderCacheOpenGL shader_cache; ShaderCacheOpenGL shader_cache;
@ -247,10 +255,6 @@ private:
OGLBufferCache buffer_cache; OGLBufferCache buffer_cache;
FenceManagerOpenGL fence_manager; FenceManagerOpenGL fence_manager;
Core::System& system;
ScreenInfo& screen_info;
ProgramManager& program_manager;
StateTracker& state_tracker;
VideoCommon::Shader::AsyncShaders async_shaders; VideoCommon::Shader::AsyncShaders async_shaders;
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;

@ -239,12 +239,11 @@ std::unique_ptr<Shader> Shader::CreateStageFromMemory(
ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
const auto shader_type = GetShaderType(program_type); const auto shader_type = GetShaderType(program_type);
auto& gpu = params.system.GPU(); auto& gpu = params.gpu;
gpu.ShaderNotify().MarkSharderBuilding(); gpu.ShaderNotify().MarkSharderBuilding();
auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D()); auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
if (!async_shaders.IsShaderAsync(params.system.GPU()) || if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) {
!params.device.UseAsynchronousShaders()) {
const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
// TODO(Rodrigo): Handle VertexA shaders // TODO(Rodrigo): Handle VertexA shaders
// std::optional<ShaderIR> ir_b; // std::optional<ShaderIR> ir_b;
@ -287,11 +286,10 @@ std::unique_ptr<Shader> Shader::CreateStageFromMemory(
std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
ProgramCode code) { ProgramCode code) {
auto& gpu = params.system.GPU(); auto& gpu = params.gpu;
gpu.ShaderNotify().MarkSharderBuilding(); gpu.ShaderNotify().MarkSharderBuilding();
auto& engine = gpu.KeplerCompute(); auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine);
auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
const u64 uid = params.unique_identifier; const u64 uid = params.unique_identifier;
auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
@ -320,15 +318,20 @@ std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
} }
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer,
Core::Frontend::EmuWindow& emu_window, const Device& device) Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
: VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, Tegra::Engines::Maxwell3D& maxwell3d_,
emu_window{emu_window}, device{device}, disk_cache{system} {} Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const Device& device_)
: VideoCommon::ShaderCache<Shader>{rasterizer}, emu_window{emu_window_}, gpu{gpu_},
gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_},
kepler_compute{kepler_compute_}, device{device_} {}
ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) { const VideoCore::DiskResourceLoadCallback& callback) {
disk_cache.BindTitleID(title_id);
const std::optional transferable = disk_cache.LoadTransferable(); const std::optional transferable = disk_cache.LoadTransferable();
if (!transferable) { if (!transferable) {
return; return;
@ -481,21 +484,19 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
VideoCommon::Shader::AsyncShaders& async_shaders) { VideoCommon::Shader::AsyncShaders& async_shaders) {
if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { if (!maxwell3d.dirty.flags[Dirty::Shaders]) {
auto* last_shader = last_shaders[static_cast<std::size_t>(program)]; auto* last_shader = last_shaders[static_cast<std::size_t>(program)];
if (last_shader->IsBuilt()) { if (last_shader->IsBuilt()) {
return last_shader; return last_shader;
} }
} }
auto& memory_manager{system.GPU().MemoryManager()}; const GPUVAddr address{GetShaderAddress(maxwell3d, program)};
const GPUVAddr address{GetShaderAddress(system, program)};
if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) {
auto completed_work = async_shaders.GetCompletedWork(); auto completed_work = async_shaders.GetCompletedWork();
for (auto& work : completed_work) { for (auto& work : completed_work) {
Shader* shader = TryGet(work.cpu_address); Shader* shader = TryGet(work.cpu_address);
auto& gpu = system.GPU();
gpu.ShaderNotify().MarkShaderComplete(); gpu.ShaderNotify().MarkShaderComplete();
if (shader == nullptr) { if (shader == nullptr) {
continue; continue;
@ -507,14 +508,13 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
shader->AsyncGLASMBuilt(std::move(work.program.glasm)); shader->AsyncGLASMBuilt(std::move(work.program.glasm));
} }
auto& registry = shader->GetRegistry();
ShaderDiskCacheEntry entry; ShaderDiskCacheEntry entry;
entry.type = work.shader_type; entry.type = work.shader_type;
entry.code = std::move(work.code); entry.code = std::move(work.code);
entry.code_b = std::move(work.code_b); entry.code_b = std::move(work.code_b);
entry.unique_identifier = work.uid; entry.unique_identifier = work.uid;
auto& registry = shader->GetRegistry();
entry.bound_buffer = registry.GetBoundBuffer(); entry.bound_buffer = registry.GetBoundBuffer();
entry.graphics_info = registry.GetGraphicsInfo(); entry.graphics_info = registry.GetGraphicsInfo();
entry.keys = registry.GetKeys(); entry.keys = registry.GetKeys();
@ -525,27 +525,27 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
} }
// Look up shader in the cache based on address // Look up shader in the cache based on address
const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)};
if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
return last_shaders[static_cast<std::size_t>(program)] = shader; return last_shaders[static_cast<std::size_t>(program)] = shader;
} }
const auto host_ptr{memory_manager.GetPointer(address)}; const u8* const host_ptr{gpu_memory.GetPointer(address)};
// No shader found - create a new one // No shader found - create a new one
ProgramCode code{GetShaderCode(memory_manager, address, host_ptr, false)}; ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)};
ProgramCode code_b; ProgramCode code_b;
if (program == Maxwell::ShaderProgram::VertexA) { if (program == Maxwell::ShaderProgram::VertexA) {
const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)};
const u8* host_ptr_b = memory_manager.GetPointer(address_b); const u8* host_ptr_b = gpu_memory.GetPointer(address_b);
code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false); code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false);
} }
const std::size_t code_size = code.size() * sizeof(u64); const std::size_t code_size = code.size() * sizeof(u64);
const u64 unique_identifier = GetUniqueIdentifier( const u64 unique_identifier = GetUniqueIdentifier(
GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
const ShaderParameters params{system, disk_cache, device, const ShaderParameters params{gpu, maxwell3d, disk_cache, device,
*cpu_addr, host_ptr, unique_identifier}; *cpu_addr, host_ptr, unique_identifier};
std::unique_ptr<Shader> shader; std::unique_ptr<Shader> shader;
@ -568,20 +568,19 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
} }
Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
auto& memory_manager{system.GPU().MemoryManager()}; const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)};
const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
return kernel; return kernel;
} }
const auto host_ptr{memory_manager.GetPointer(code_addr)};
// No kernel found, create a new one // No kernel found, create a new one
ProgramCode code{GetShaderCode(memory_manager, code_addr, host_ptr, true)}; const u8* host_ptr{gpu_memory.GetPointer(code_addr)};
ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)};
const std::size_t code_size{code.size() * sizeof(u64)}; const std::size_t code_size{code.size() * sizeof(u64)};
const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
const ShaderParameters params{system, disk_cache, device, const ShaderParameters params{gpu, kepler_compute, disk_cache, device,
*cpu_addr, host_ptr, unique_identifier}; *cpu_addr, host_ptr, unique_identifier};
std::unique_ptr<Shader> kernel; std::unique_ptr<Shader> kernel;

@ -25,8 +25,8 @@
#include "video_core/shader/shader_ir.h" #include "video_core/shader/shader_ir.h"
#include "video_core/shader_cache.h" #include "video_core/shader_cache.h"
namespace Core { namespace Tegra {
class System; class MemoryManager;
} }
namespace Core::Frontend { namespace Core::Frontend {
@ -57,11 +57,12 @@ struct PrecompiledShader {
}; };
struct ShaderParameters { struct ShaderParameters {
Core::System& system; Tegra::GPU& gpu;
Tegra::Engines::ConstBufferEngineInterface& engine;
ShaderDiskCacheOpenGL& disk_cache; ShaderDiskCacheOpenGL& disk_cache;
const Device& device; const Device& device;
VAddr cpu_addr; VAddr cpu_addr;
u8* host_ptr; const u8* host_ptr;
u64 unique_identifier; u64 unique_identifier;
}; };
@ -118,12 +119,14 @@ private:
class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
public: public:
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::Frontend::EmuWindow& emu_window,
Core::Frontend::EmuWindow& emu_window, const Device& device); Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::Engines::KeplerCompute& kepler_compute,
Tegra::MemoryManager& gpu_memory, const Device& device);
~ShaderCacheOpenGL() override; ~ShaderCacheOpenGL() override;
/// Loads disk cache for the current game /// Loads disk cache for the current game
void LoadDiskCache(const std::atomic_bool& stop_loading, void LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback); const VideoCore::DiskResourceLoadCallback& callback);
/// Gets the current specified shader stage program /// Gets the current specified shader stage program
@ -138,9 +141,13 @@ private:
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats); const std::unordered_set<GLenum>& supported_formats);
Core::System& system;
Core::Frontend::EmuWindow& emu_window; Core::Frontend::EmuWindow& emu_window;
Tegra::GPU& gpu;
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
const Device& device; const Device& device;
ShaderDiskCacheOpenGL disk_cache; ShaderDiskCacheOpenGL disk_cache;
std::unordered_map<u64, PrecompiledShader> runtime_cache; std::unordered_map<u64, PrecompiledShader> runtime_cache;

@ -206,13 +206,17 @@ bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
flat_bindless_samplers.size(); flat_bindless_samplers.size();
} }
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
title_id = title_id_;
}
std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() { std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
// Skip games without title id // Skip games without title id
const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0; const bool has_title_id = title_id != 0;
if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) { if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
return std::nullopt; return std::nullopt;
} }
@ -474,7 +478,7 @@ std::string ShaderDiskCacheOpenGL::GetBaseDir() const {
} }
std::string ShaderDiskCacheOpenGL::GetTitleID() const { std::string ShaderDiskCacheOpenGL::GetTitleID() const {
return fmt::format("{:016X}", system.CurrentProcess()->GetTitleID()); return fmt::format("{:016X}", title_id);
} }
} // namespace OpenGL } // namespace OpenGL

@ -21,10 +21,6 @@
#include "video_core/engines/shader_type.h" #include "video_core/engines/shader_type.h"
#include "video_core/shader/registry.h" #include "video_core/shader/registry.h"
namespace Core {
class System;
}
namespace Common::FS { namespace Common::FS {
class IOFile; class IOFile;
} }
@ -70,9 +66,12 @@ struct ShaderDiskCachePrecompiled {
class ShaderDiskCacheOpenGL { class ShaderDiskCacheOpenGL {
public: public:
explicit ShaderDiskCacheOpenGL(Core::System& system); explicit ShaderDiskCacheOpenGL();
~ShaderDiskCacheOpenGL(); ~ShaderDiskCacheOpenGL();
/// Binds a title ID for all future operations.
void BindTitleID(u64 title_id);
/// Loads transferable cache. If file has a old version or on failure, it deletes the file. /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable(); std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
@ -157,8 +156,6 @@ private:
return LoadArrayFromPrecompiled(&object, 1); return LoadArrayFromPrecompiled(&object, 1);
} }
Core::System& system;
// Stores whole precompiled cache which will be read from or saved to the precompiled chache // Stores whole precompiled cache which will be read from or saved to the precompiled chache
// file // file
FileSys::VectorVfsFile precompiled_cache_virtual_file; FileSys::VectorVfsFile precompiled_cache_virtual_file;
@ -168,8 +165,11 @@ private:
// Stored transferable shaders // Stored transferable shaders
std::unordered_set<u64> stored_transferable; std::unordered_set<u64> stored_transferable;
/// Title ID to operate on
u64 title_id = 0;
// The cache has been loaded at boot // The cache has been loaded at boot
bool is_usable{}; bool is_usable = false;
}; };
} // namespace OpenGL } // namespace OpenGL

@ -214,10 +214,8 @@ void SetupDirtyMisc(Tables& tables) {
} // Anonymous namespace } // Anonymous namespace
StateTracker::StateTracker(Core::System& system) : system{system} {} StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
auto& dirty = gpu.Maxwell3D().dirty;
void StateTracker::Initialize() {
auto& dirty = system.GPU().Maxwell3D().dirty;
auto& tables = dirty.tables; auto& tables = dirty.tables;
SetupDirtyRenderTargets(tables); SetupDirtyRenderTargets(tables);
SetupDirtyColorMasks(tables); SetupDirtyColorMasks(tables);

@ -13,8 +13,8 @@
#include "video_core/dirty_flags.h" #include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
namespace Core { namespace Tegra {
class System; class GPU;
} }
namespace OpenGL { namespace OpenGL {
@ -90,9 +90,7 @@ static_assert(Last <= std::numeric_limits<u8>::max());
class StateTracker { class StateTracker {
public: public:
explicit StateTracker(Core::System& system); explicit StateTracker(Tegra::GPU& gpu);
void Initialize();
void BindIndexBuffer(GLuint new_index_buffer) { void BindIndexBuffer(GLuint new_index_buffer) {
if (index_buffer == new_index_buffer) { if (index_buffer == new_index_buffer) {
@ -103,7 +101,6 @@ public:
} }
void NotifyScreenDrawVertexArray() { void NotifyScreenDrawVertexArray() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::VertexFormats] = true; flags[OpenGL::Dirty::VertexFormats] = true;
flags[OpenGL::Dirty::VertexFormat0 + 0] = true; flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
flags[OpenGL::Dirty::VertexFormat0 + 1] = true; flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
@ -117,98 +114,81 @@ public:
} }
void NotifyPolygonModes() { void NotifyPolygonModes() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::PolygonModes] = true; flags[OpenGL::Dirty::PolygonModes] = true;
flags[OpenGL::Dirty::PolygonModeFront] = true; flags[OpenGL::Dirty::PolygonModeFront] = true;
flags[OpenGL::Dirty::PolygonModeBack] = true; flags[OpenGL::Dirty::PolygonModeBack] = true;
} }
void NotifyViewport0() { void NotifyViewport0() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::Viewports] = true; flags[OpenGL::Dirty::Viewports] = true;
flags[OpenGL::Dirty::Viewport0] = true; flags[OpenGL::Dirty::Viewport0] = true;
} }
void NotifyScissor0() { void NotifyScissor0() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::Scissors] = true; flags[OpenGL::Dirty::Scissors] = true;
flags[OpenGL::Dirty::Scissor0] = true; flags[OpenGL::Dirty::Scissor0] = true;
} }
void NotifyColorMask0() { void NotifyColorMask0() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::ColorMasks] = true; flags[OpenGL::Dirty::ColorMasks] = true;
flags[OpenGL::Dirty::ColorMask0] = true; flags[OpenGL::Dirty::ColorMask0] = true;
} }
void NotifyBlend0() { void NotifyBlend0() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::BlendStates] = true; flags[OpenGL::Dirty::BlendStates] = true;
flags[OpenGL::Dirty::BlendState0] = true; flags[OpenGL::Dirty::BlendState0] = true;
} }
void NotifyFramebuffer() { void NotifyFramebuffer() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[VideoCommon::Dirty::RenderTargets] = true; flags[VideoCommon::Dirty::RenderTargets] = true;
} }
void NotifyFrontFace() { void NotifyFrontFace() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::FrontFace] = true; flags[OpenGL::Dirty::FrontFace] = true;
} }
void NotifyCullTest() { void NotifyCullTest() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::CullTest] = true; flags[OpenGL::Dirty::CullTest] = true;
} }
void NotifyDepthMask() { void NotifyDepthMask() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::DepthMask] = true; flags[OpenGL::Dirty::DepthMask] = true;
} }
void NotifyDepthTest() { void NotifyDepthTest() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::DepthTest] = true; flags[OpenGL::Dirty::DepthTest] = true;
} }
void NotifyStencilTest() { void NotifyStencilTest() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::StencilTest] = true; flags[OpenGL::Dirty::StencilTest] = true;
} }
void NotifyPolygonOffset() { void NotifyPolygonOffset() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::PolygonOffset] = true; flags[OpenGL::Dirty::PolygonOffset] = true;
} }
void NotifyRasterizeEnable() { void NotifyRasterizeEnable() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::RasterizeEnable] = true; flags[OpenGL::Dirty::RasterizeEnable] = true;
} }
void NotifyFramebufferSRGB() { void NotifyFramebufferSRGB() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::FramebufferSRGB] = true; flags[OpenGL::Dirty::FramebufferSRGB] = true;
} }
void NotifyLogicOp() { void NotifyLogicOp() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::LogicOp] = true; flags[OpenGL::Dirty::LogicOp] = true;
} }
void NotifyClipControl() { void NotifyClipControl() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::ClipControl] = true; flags[OpenGL::Dirty::ClipControl] = true;
} }
void NotifyAlphaTest() { void NotifyAlphaTest() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::AlphaTest] = true; flags[OpenGL::Dirty::AlphaTest] = true;
} }
private: private:
Core::System& system; Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
GLuint index_buffer = 0; GLuint index_buffer = 0;
}; };

@ -532,10 +532,12 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
return texture_view; return texture_view;
} }
TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer,
VideoCore::RasterizerInterface& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
const Device& device, StateTracker& state_tracker) Tegra::MemoryManager& gpu_memory, const Device& device,
: TextureCacheBase{system, rasterizer, device.HasASTC()}, state_tracker{state_tracker} { StateTracker& state_tracker_)
: TextureCacheBase{rasterizer, maxwell3d, gpu_memory, device.HasASTC()}, state_tracker{
state_tracker_} {
src_framebuffer.Create(); src_framebuffer.Create();
dst_framebuffer.Create(); dst_framebuffer.Create();
} }

@ -129,8 +129,10 @@ private:
class TextureCacheOpenGL final : public TextureCacheBase { class TextureCacheOpenGL final : public TextureCacheBase {
public: public:
explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer,
const Device& device, StateTracker& state_tracker); Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::MemoryManager& gpu_memory, const Device& device,
StateTracker& state_tracker);
~TextureCacheOpenGL(); ~TextureCacheOpenGL();
protected: protected:

@ -275,11 +275,13 @@ public:
} }
}; };
RendererOpenGL::RendererOpenGL(Core::System& system_, Core::Frontend::EmuWindow& emu_window_, RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
Tegra::GPU& gpu_, Core::Frontend::EmuWindow& emu_window_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_) Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
: RendererBase{emu_window_, std::move(context_)}, system{system_}, std::unique_ptr<Core::Frontend::GraphicsContext> context)
emu_window{emu_window_}, gpu{gpu_}, program_manager{device}, has_debug_tool{HasDebugTool()} {} : RendererBase{emu_window_, std::move(context)}, telemetry_session{telemetry_session_},
emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device},
has_debug_tool{HasDebugTool()} {}
RendererOpenGL::~RendererOpenGL() = default; RendererOpenGL::~RendererOpenGL() = default;
@ -386,7 +388,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
u8* const host_ptr{system.Memory().GetPointer(framebuffer_addr)}; u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes); rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
// TODO(Rodrigo): Read this from HLE // TODO(Rodrigo): Read this from HLE
@ -471,7 +473,6 @@ void RendererOpenGL::AddTelemetryFields() {
LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor); LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model); LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
auto& telemetry_session = system.TelemetrySession();
constexpr auto user_system = Common::Telemetry::FieldType::UserSystem; constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(user_system, "GPU_Vendor", gpu_vendor); telemetry_session.AddField(user_system, "GPU_Vendor", gpu_vendor);
telemetry_session.AddField(user_system, "GPU_Model", gpu_model); telemetry_session.AddField(user_system, "GPU_Model", gpu_model);
@ -482,8 +483,8 @@ void RendererOpenGL::CreateRasterizer() {
if (rasterizer) { if (rasterizer) {
return; return;
} }
rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info, rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device,
program_manager, state_tracker); screen_info, program_manager, state_tracker);
} }
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,

@ -16,16 +16,25 @@
namespace Core { namespace Core {
class System; class System;
} class TelemetrySession;
} // namespace Core
namespace Core::Frontend { namespace Core::Frontend {
class EmuWindow; class EmuWindow;
} }
namespace Core::Memory {
class Memory;
}
namespace Layout { namespace Layout {
struct FramebufferLayout; struct FramebufferLayout;
} }
namespace Tegra {
class GPU;
}
namespace OpenGL { namespace OpenGL {
/// Structure used for storing information about the textures for the Switch screen /// Structure used for storing information about the textures for the Switch screen
@ -56,7 +65,8 @@ class FrameMailbox;
class RendererOpenGL final : public VideoCore::RendererBase { class RendererOpenGL final : public VideoCore::RendererBase {
public: public:
explicit RendererOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, explicit RendererOpenGL(Core::TelemetrySession& telemetry_session,
Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory,
Tegra::GPU& gpu, Tegra::GPU& gpu,
std::unique_ptr<Core::Frontend::GraphicsContext> context); std::unique_ptr<Core::Frontend::GraphicsContext> context);
~RendererOpenGL() override; ~RendererOpenGL() override;
@ -94,12 +104,13 @@ private:
bool Present(int timeout_ms); bool Present(int timeout_ms);
Core::System& system; Core::TelemetrySession& telemetry_session;
Core::Frontend::EmuWindow& emu_window; Core::Frontend::EmuWindow& emu_window;
Core::Memory::Memory& cpu_memory;
Tegra::GPU& gpu; Tegra::GPU& gpu;
const Device device;
StateTracker state_tracker{system}; const Device device;
StateTracker state_tracker{gpu};
// OpenGL object IDs // OpenGL object IDs
OGLBuffer vertex_buffer; OGLBuffer vertex_buffer;

@ -86,7 +86,7 @@ Common::DynamicLibrary OpenVulkanLibrary() {
if (!library.Open(filename.c_str())) { if (!library.Open(filename.c_str())) {
// Android devices may not have libvulkan.so.1, only libvulkan.so. // Android devices may not have libvulkan.so.1, only libvulkan.so.
filename = Common::DynamicLibrary::GetVersionedFilename("vulkan"); filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
library.Open(filename.c_str()); (void)library.Open(filename.c_str());
} }
#endif #endif
return library; return library;
@ -237,10 +237,12 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
} // Anonymous namespace } // Anonymous namespace
RendererVulkan::RendererVulkan(Core::System& system_, Core::Frontend::EmuWindow& emu_window, RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
Tegra::GPU& gpu_, Core::Frontend::EmuWindow& emu_window,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context) std::unique_ptr<Core::Frontend::GraphicsContext> context)
: RendererBase{emu_window, std::move(context)}, system{system_}, gpu{gpu_} {} : RendererBase{emu_window, std::move(context)}, telemetry_session{telemetry_session_},
cpu_memory{cpu_memory_}, gpu{gpu_} {}
RendererVulkan::~RendererVulkan() { RendererVulkan::~RendererVulkan() {
ShutDown(); ShutDown();
@ -304,15 +306,15 @@ bool RendererVulkan::Init() {
swapchain = std::make_unique<VKSwapchain>(*surface, *device); swapchain = std::make_unique<VKSwapchain>(*surface, *device);
swapchain->Create(framebuffer.width, framebuffer.height, false); swapchain->Create(framebuffer.width, framebuffer.height, false);
state_tracker = std::make_unique<StateTracker>(system); state_tracker = std::make_unique<StateTracker>(gpu);
scheduler = std::make_unique<VKScheduler>(*device, *resource_manager, *state_tracker); scheduler = std::make_unique<VKScheduler>(*device, *resource_manager, *state_tracker);
rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device, rasterizer = std::make_unique<RasterizerVulkan>(
*resource_manager, *memory_manager, render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, *device,
*state_tracker, *scheduler); *resource_manager, *memory_manager, *state_tracker, *scheduler);
blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device, blit_screen = std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device,
*resource_manager, *memory_manager, *swapchain, *resource_manager, *memory_manager, *swapchain,
*scheduler, screen_info); *scheduler, screen_info);
@ -440,8 +442,7 @@ void RendererVulkan::Report() const {
LOG_INFO(Render_Vulkan, "Device: {}", model_name); LOG_INFO(Render_Vulkan, "Device: {}", model_name);
LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version); LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version);
auto& telemetry_session = system.TelemetrySession(); static constexpr auto field = Common::Telemetry::FieldType::UserSystem;
constexpr auto field = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(field, "GPU_Vendor", vendor_name); telemetry_session.AddField(field, "GPU_Vendor", vendor_name);
telemetry_session.AddField(field, "GPU_Model", model_name); telemetry_session.AddField(field, "GPU_Model", model_name);
telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name); telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name);

@ -14,7 +14,15 @@
#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/renderer_vulkan/wrapper.h"
namespace Core { namespace Core {
class System; class TelemetrySession;
}
namespace Core::Memory {
class Memory;
}
namespace Tegra {
class GPU;
} }
namespace Vulkan { namespace Vulkan {
@ -38,7 +46,8 @@ struct VKScreenInfo {
class RendererVulkan final : public VideoCore::RendererBase { class RendererVulkan final : public VideoCore::RendererBase {
public: public:
explicit RendererVulkan(Core::System& system, Core::Frontend::EmuWindow& emu_window, explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory,
Tegra::GPU& gpu, Tegra::GPU& gpu,
std::unique_ptr<Core::Frontend::GraphicsContext> context); std::unique_ptr<Core::Frontend::GraphicsContext> context);
~RendererVulkan() override; ~RendererVulkan() override;
@ -59,7 +68,8 @@ private:
void Report() const; void Report() const;
Core::System& system; Core::TelemetrySession& telemetry_session;
Core::Memory::Memory& cpu_memory;
Tegra::GPU& gpu; Tegra::GPU& gpu;
Common::DynamicLibrary library; Common::DynamicLibrary library;

@ -210,14 +210,16 @@ struct VKBlitScreen::BufferData {
// Unaligned image data goes here // Unaligned image data goes here
}; };
VKBlitScreen::VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window, VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, Core::Frontend::EmuWindow& render_window_,
VKResourceManager& resource_manager, VKMemoryManager& memory_manager, VideoCore::RasterizerInterface& rasterizer_, const VKDevice& device_,
VKSwapchain& swapchain, VKScheduler& scheduler, VKResourceManager& resource_manager_, VKMemoryManager& memory_manager_,
const VKScreenInfo& screen_info) VKSwapchain& swapchain_, VKScheduler& scheduler_,
: system{system}, render_window{render_window}, rasterizer{rasterizer}, device{device}, const VKScreenInfo& screen_info_)
resource_manager{resource_manager}, memory_manager{memory_manager}, swapchain{swapchain}, : cpu_memory{cpu_memory_}, render_window{render_window_},
scheduler{scheduler}, image_count{swapchain.GetImageCount()}, screen_info{screen_info} { rasterizer{rasterizer_}, device{device_}, resource_manager{resource_manager_},
memory_manager{memory_manager_}, swapchain{swapchain_}, scheduler{scheduler_},
image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
watches.resize(image_count); watches.resize(image_count);
std::generate(watches.begin(), watches.end(), std::generate(watches.begin(), watches.end(),
[]() { return std::make_unique<VKFenceWatch>(); }); []() { return std::make_unique<VKFenceWatch>(); });
@ -259,7 +261,7 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon
const auto pixel_format = const auto pixel_format =
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
const auto host_ptr = system.Memory().GetPointer(framebuffer_addr); const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr);
rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer)); rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer));
// TODO(Rodrigo): Read this from HLE // TODO(Rodrigo): Read this from HLE

@ -15,6 +15,10 @@ namespace Core {
class System; class System;
} }
namespace Core::Memory {
class Memory;
}
namespace Core::Frontend { namespace Core::Frontend {
class EmuWindow; class EmuWindow;
} }
@ -39,7 +43,8 @@ class VKSwapchain;
class VKBlitScreen final { class VKBlitScreen final {
public: public:
explicit VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window, explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
Core::Frontend::EmuWindow& render_window,
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
VKResourceManager& resource_manager, VKMemoryManager& memory_manager, VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
VKSwapchain& swapchain, VKScheduler& scheduler, VKSwapchain& swapchain, VKScheduler& scheduler,
@ -81,7 +86,7 @@ private:
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const; std::size_t image_index) const;
Core::System& system; Core::Memory::Memory& cpu_memory;
Core::Frontend::EmuWindow& render_window; Core::Frontend::EmuWindow& render_window;
VideoCore::RasterizerInterface& rasterizer; VideoCore::RasterizerInterface& rasterizer;
const VKDevice& device; const VKDevice& device;

@ -145,14 +145,15 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
}); });
} }
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
const VKDevice& device, VKMemoryManager& memory_manager, Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
VKScheduler& scheduler, VKStagingBufferPool& staging_pool) const VKDevice& device_, VKMemoryManager& memory_manager_,
: VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system, VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_)
CreateStreamBuffer(device, : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, gpu_memory, cpu_memory,
scheduler)}, CreateStreamBuffer(device_,
device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ scheduler_)},
staging_pool} {} device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{
staging_pool_} {}
VKBufferCache::~VKBufferCache() = default; VKBufferCache::~VKBufferCache() = default;

@ -13,10 +13,6 @@
#include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/renderer_vulkan/wrapper.h"
namespace Core {
class System;
}
namespace Vulkan { namespace Vulkan {
class VKDevice; class VKDevice;
@ -53,7 +49,8 @@ private:
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
public: public:
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const VKDevice& device, VKMemoryManager& memory_manager, const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler, VKStagingBufferPool& staging_pool); VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
~VKBufferCache(); ~VKBufferCache();

@ -71,12 +71,12 @@ bool InnerFence::IsEventSignalled() const {
} }
} }
VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
const VKDevice& device, VKScheduler& scheduler, Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache,
VKTextureCache& texture_cache, VKBufferCache& buffer_cache, VKBufferCache& buffer_cache, VKQueryCache& query_cache,
VKQueryCache& query_cache) const VKDevice& device_, VKScheduler& scheduler_)
: GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache), : GenericFenceManager(rasterizer, gpu, texture_cache, buffer_cache, query_cache),
device{device}, scheduler{scheduler} {} device{device_}, scheduler{scheduler_} {}
Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) { Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed); return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed);

@ -55,10 +55,10 @@ using GenericFenceManager =
class VKFenceManager final : public GenericFenceManager { class VKFenceManager final : public GenericFenceManager {
public: public:
explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
const VKDevice& device, VKScheduler& scheduler, Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache,
VKTextureCache& texture_cache, VKBufferCache& buffer_cache, VKBufferCache& buffer_cache, VKQueryCache& query_cache,
VKQueryCache& query_cache); const VKDevice& device, VKScheduler& scheduler);
protected: protected:
Fence CreateFence(u32 value, bool is_stubbed) override; Fence CreateFence(u32 value, bool is_stubbed) override;

@ -135,64 +135,56 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
return std::memcmp(&rhs, this, sizeof *this) == 0; return std::memcmp(&rhs, this, sizeof *this) == 0;
} }
Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine, Tegra::Engines::ShaderType stage,
VideoCommon::Shader::ProgramCode program_code, u32 main_offset) GPUVAddr gpu_addr_, VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code_,
: gpu_addr{gpu_addr}, program_code{std::move(program_code)}, u32 main_offset)
registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage, engine),
compiler_settings, registry}, shader_ir(program_code, main_offset, compiler_settings, registry),
entries{GenerateShaderEntries(shader_ir)} {} entries(GenerateShaderEntries(shader_ir)) {}
Shader::~Shader() = default; Shader::~Shader() = default;
Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system, VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu_,
Tegra::Engines::ShaderType stage) { Tegra::Engines::Maxwell3D& maxwell3d_,
if (stage == ShaderType::Compute) { Tegra::Engines::KeplerCompute& kepler_compute_,
return system.GPU().KeplerCompute(); Tegra::MemoryManager& gpu_memory_, const VKDevice& device_,
} else { VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
return system.GPU().Maxwell3D(); VKUpdateDescriptorQueue& update_descriptor_queue_,
} VKRenderPassCache& renderpass_cache_)
} : VideoCommon::ShaderCache<Shader>{rasterizer}, gpu{gpu_}, maxwell3d{maxwell3d_},
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
const VKDevice& device, VKScheduler& scheduler, update_descriptor_queue{update_descriptor_queue_}, renderpass_cache{renderpass_cache_} {}
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
VKRenderPassCache& renderpass_cache)
: VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device},
scheduler{scheduler}, descriptor_pool{descriptor_pool},
update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {}
VKPipelineCache::~VKPipelineCache() = default; VKPipelineCache::~VKPipelineCache() = default;
std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
const auto& gpu = system.GPU().Maxwell3D();
std::array<Shader*, Maxwell::MaxShaderProgram> shaders{}; std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto program{static_cast<Maxwell::ShaderProgram>(index)}; const auto program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled // Skip stages that are not enabled
if (!gpu.regs.IsShaderConfigEnabled(index)) { if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
continue; continue;
} }
auto& memory_manager{system.GPU().MemoryManager()}; const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)};
const GPUVAddr program_addr{GetShaderAddress(system, program)}; const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr); ASSERT(cpu_addr);
Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
if (!result) { if (!result) {
const auto host_ptr{memory_manager.GetPointer(program_addr)}; const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)};
// No shader found - create a new one // No shader found - create a new one
constexpr u32 stage_offset = STAGE_MAIN_OFFSET; static constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1); const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false); ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false);
const std::size_t size_in_bytes = code.size() * sizeof(u64); const std::size_t size_in_bytes = code.size() * sizeof(u64);
auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code), auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr,
stage_offset); std::move(code), stage_offset);
result = shader.get(); result = shader.get();
if (cpu_addr) { if (cpu_addr) {
@ -215,11 +207,11 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
} }
last_graphics_key = key; last_graphics_key = key;
if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(system.GPU())) { if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) {
std::unique_lock lock{pipeline_cache}; std::unique_lock lock{pipeline_cache};
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
if (is_cache_miss) { if (is_cache_miss) {
system.GPU().ShaderNotify().MarkSharderBuilding(); gpu.ShaderNotify().MarkSharderBuilding();
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
const auto [program, bindings] = DecompileShaders(key.fixed_state); const auto [program, bindings] = DecompileShaders(key.fixed_state);
async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
@ -233,13 +225,13 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
auto& entry = pair->second; auto& entry = pair->second;
if (is_cache_miss) { if (is_cache_miss) {
system.GPU().ShaderNotify().MarkSharderBuilding(); gpu.ShaderNotify().MarkSharderBuilding();
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
const auto [program, bindings] = DecompileShaders(key.fixed_state); const auto [program, bindings] = DecompileShaders(key.fixed_state);
entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
update_descriptor_queue, renderpass_cache, key, update_descriptor_queue, renderpass_cache, key,
bindings, program); bindings, program);
system.GPU().ShaderNotify().MarkShaderComplete(); gpu.ShaderNotify().MarkShaderComplete();
} }
last_graphics_pipeline = entry.get(); last_graphics_pipeline = entry.get();
return last_graphics_pipeline; return last_graphics_pipeline;
@ -255,22 +247,21 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
} }
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
auto& memory_manager = system.GPU().MemoryManager(); const GPUVAddr gpu_addr = key.shader;
const auto program_addr = key.shader;
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr); ASSERT(cpu_addr);
Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
if (!shader) { if (!shader) {
// No shader found - create a new one // No shader found - create a new one
const auto host_ptr = memory_manager.GetPointer(program_addr); const auto host_ptr = gpu_memory.GetPointer(gpu_addr);
ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true); ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true);
const std::size_t size_in_bytes = code.size() * sizeof(u64); const std::size_t size_in_bytes = code.size() * sizeof(u64);
auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr, auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr,
std::move(code), KERNEL_MAIN_OFFSET); *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET);
shader = shader_info.get(); shader = shader_info.get();
if (cpu_addr) { if (cpu_addr) {
@ -298,7 +289,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
} }
void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) { void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
system.GPU().ShaderNotify().MarkShaderComplete(); gpu.ShaderNotify().MarkShaderComplete();
std::unique_lock lock{pipeline_cache}; std::unique_lock lock{pipeline_cache};
graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
} }
@ -339,9 +330,6 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) {
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
auto& memory_manager = system.GPU().MemoryManager();
const auto& gpu = system.GPU().Maxwell3D();
Specialization specialization; Specialization specialization;
if (fixed_state.dynamic_state.Topology() == Maxwell::PrimitiveTopology::Points || if (fixed_state.dynamic_state.Topology() == Maxwell::PrimitiveTopology::Points ||
device.IsExtExtendedDynamicStateSupported()) { device.IsExtExtendedDynamicStateSupported()) {
@ -364,12 +352,12 @@ VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
// Skip stages that are not enabled // Skip stages that are not enabled
if (!gpu.regs.IsShaderConfigEnabled(index)) { if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
continue; continue;
} }
const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum);
const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5

@ -85,7 +85,8 @@ namespace Vulkan {
class Shader { class Shader {
public: public:
explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine,
Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, VAddr cpu_addr,
VideoCommon::Shader::ProgramCode program_code, u32 main_offset); VideoCommon::Shader::ProgramCode program_code, u32 main_offset);
~Shader(); ~Shader();
@ -97,22 +98,19 @@ public:
return shader_ir; return shader_ir;
} }
const VideoCommon::Shader::Registry& GetRegistry() const {
return registry;
}
const VideoCommon::Shader::ShaderIR& GetIR() const { const VideoCommon::Shader::ShaderIR& GetIR() const {
return shader_ir; return shader_ir;
} }
const VideoCommon::Shader::Registry& GetRegistry() const {
return registry;
}
const ShaderEntries& GetEntries() const { const ShaderEntries& GetEntries() const {
return entries; return entries;
} }
private: private:
static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system,
Tegra::Engines::ShaderType stage);
GPUVAddr gpu_addr{}; GPUVAddr gpu_addr{};
VideoCommon::Shader::ProgramCode program_code; VideoCommon::Shader::ProgramCode program_code;
VideoCommon::Shader::Registry registry; VideoCommon::Shader::Registry registry;
@ -122,9 +120,11 @@ private:
class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
public: public:
explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
const VKDevice& device, VKScheduler& scheduler, Tegra::Engines::Maxwell3D& maxwell3d,
VKDescriptorPool& descriptor_pool, Tegra::Engines::KeplerCompute& kepler_compute,
Tegra::MemoryManager& gpu_memory, const VKDevice& device,
VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue, VKUpdateDescriptorQueue& update_descriptor_queue,
VKRenderPassCache& renderpass_cache); VKRenderPassCache& renderpass_cache);
~VKPipelineCache() override; ~VKPipelineCache() override;
@ -145,7 +145,11 @@ private:
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
const FixedPipelineState& fixed_state); const FixedPipelineState& fixed_state);
Core::System& system; Tegra::GPU& gpu;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
const VKDevice& device; const VKDevice& device;
VKScheduler& scheduler; VKScheduler& scheduler;
VKDescriptorPool& descriptor_pool; VKDescriptorPool& descriptor_pool;

@ -68,10 +68,11 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
} }
VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer,
Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
const VKDevice& device, VKScheduler& scheduler) const VKDevice& device, VKScheduler& scheduler)
: VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
QueryPool>{system, rasterizer}, QueryPool>{rasterizer, maxwell3d, gpu_memory},
device{device}, scheduler{scheduler} { device{device}, scheduler{scheduler} {
for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) { for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) {
query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i)); query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i));

@ -56,7 +56,8 @@ class VKQueryCache final
: public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
QueryPool> { QueryPool> {
public: public:
explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer,
Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
const VKDevice& device, VKScheduler& scheduler); const VKDevice& device, VKScheduler& scheduler);
~VKQueryCache(); ~VKQueryCache();

@ -381,28 +381,30 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
} }
} }
RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer, RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_,
VKScreenInfo& screen_info, const VKDevice& device, Tegra::MemoryManager& gpu_memory_,
VKResourceManager& resource_manager, Core::Memory::Memory& cpu_memory, VKScreenInfo& screen_info_,
VKMemoryManager& memory_manager, StateTracker& state_tracker, const VKDevice& device_, VKResourceManager& resource_manager_,
VKScheduler& scheduler) VKMemoryManager& memory_manager_, StateTracker& state_tracker_,
: RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer}, VKScheduler& scheduler_)
screen_info{screen_info}, device{device}, resource_manager{resource_manager}, : RasterizerAccelerated(cpu_memory), gpu(gpu_), gpu_memory(gpu_memory_),
memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler}, maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), screen_info(screen_info_),
device(device_), resource_manager(resource_manager_), memory_manager(memory_manager_),
state_tracker(state_tracker_), scheduler(scheduler_),
staging_pool(device, memory_manager, scheduler), descriptor_pool(device), staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
update_descriptor_queue(device, scheduler), renderpass_cache(device), update_descriptor_queue(device, scheduler), renderpass_cache(device),
quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, texture_cache(*this, maxwell3d, gpu_memory, device, resource_manager, memory_manager,
staging_pool), scheduler, staging_pool),
pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
renderpass_cache), descriptor_pool, update_descriptor_queue, renderpass_cache),
buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), buffer_cache(*this, gpu_memory, cpu_memory, device, memory_manager, scheduler, staging_pool),
sampler_cache(device), sampler_cache(device), query_cache(*this, maxwell3d, gpu_memory, device, scheduler),
fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache), fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device,
query_cache(system, *this, device, scheduler), scheduler),
wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} { wfi_event(device.GetLogical().CreateNewEvent()), async_shaders(emu_window) {
scheduler.SetQueryCache(query_cache); scheduler.SetQueryCache(query_cache);
if (device.UseAsynchronousShaders()) { if (device.UseAsynchronousShaders()) {
async_shaders.AllocateWorkers(); async_shaders.AllocateWorkers();
@ -414,15 +416,13 @@ RasterizerVulkan::~RasterizerVulkan() = default;
void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(Vulkan_Drawing); MICROPROFILE_SCOPE(Vulkan_Drawing);
SCOPE_EXIT({ gpu.TickWork(); });
FlushWork(); FlushWork();
query_cache.UpdateCounters(); query_cache.UpdateCounters();
SCOPE_EXIT({ system.GPU().TickWork(); });
const auto& gpu = system.GPU().Maxwell3D();
GraphicsPipelineCacheKey key; GraphicsPipelineCacheKey key;
key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported()); key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported());
buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
@ -480,8 +480,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
void RasterizerVulkan::Clear() { void RasterizerVulkan::Clear() {
MICROPROFILE_SCOPE(Vulkan_Clearing); MICROPROFILE_SCOPE(Vulkan_Clearing);
const auto& gpu = system.GPU().Maxwell3D(); if (!maxwell3d.ShouldExecute()) {
if (!system.GPU().Maxwell3D().ShouldExecute()) {
return; return;
} }
@ -490,7 +489,7 @@ void RasterizerVulkan::Clear() {
query_cache.UpdateCounters(); query_cache.UpdateCounters();
const auto& regs = gpu.regs; const auto& regs = maxwell3d.regs;
const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A; regs.clear_buffers.A;
const bool use_depth = regs.clear_buffers.Z; const bool use_depth = regs.clear_buffers.Z;
@ -559,7 +558,7 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
query_cache.UpdateCounters(); query_cache.UpdateCounters();
const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const auto& launch_desc = kepler_compute.launch_description;
auto& pipeline = pipeline_cache.GetComputePipeline({ auto& pipeline = pipeline_cache.GetComputePipeline({
.shader = code_addr, .shader = code_addr,
.shared_memory_size = launch_desc.shared_alloc, .shared_memory_size = launch_desc.shared_alloc,
@ -655,16 +654,14 @@ void RasterizerVulkan::SyncGuestHost() {
} }
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) { if (!gpu.IsAsync()) {
gpu.MemoryManager().Write<u32>(addr, value); gpu_memory.Write<u32>(addr, value);
return; return;
} }
fence_manager.SignalSemaphore(addr, value); fence_manager.SignalSemaphore(addr, value);
} }
void RasterizerVulkan::SignalSyncPoint(u32 value) { void RasterizerVulkan::SignalSyncPoint(u32 value) {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) { if (!gpu.IsAsync()) {
gpu.IncrementSyncPoint(value); gpu.IncrementSyncPoint(value);
return; return;
@ -673,7 +670,6 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) {
} }
void RasterizerVulkan::ReleaseFences() { void RasterizerVulkan::ReleaseFences() {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) { if (!gpu.IsAsync()) {
return; return;
} }
@ -751,10 +747,6 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
return true; return true;
} }
void RasterizerVulkan::SetupDirtyFlags() {
state_tracker.Initialize();
}
void RasterizerVulkan::FlushWork() { void RasterizerVulkan::FlushWork() {
static constexpr u32 DRAWS_TO_DISPATCH = 4096; static constexpr u32 DRAWS_TO_DISPATCH = 4096;
@ -778,10 +770,9 @@ void RasterizerVulkan::FlushWork() {
RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) { RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) {
MICROPROFILE_SCOPE(Vulkan_RenderTargets); MICROPROFILE_SCOPE(Vulkan_RenderTargets);
auto& maxwell3d = system.GPU().Maxwell3D();
auto& dirty = maxwell3d.dirty.flags;
auto& regs = maxwell3d.regs;
const auto& regs = maxwell3d.regs;
auto& dirty = maxwell3d.dirty.flags;
const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
dirty[VideoCommon::Dirty::RenderTargets] = false; dirty[VideoCommon::Dirty::RenderTargets] = false;
@ -844,7 +835,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
return true; return true;
}; };
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = maxwell3d.regs;
const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count); const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
for (std::size_t index = 0; index < num_attachments; ++index) { for (std::size_t index = 0; index < num_attachments; ++index) {
if (try_push(color_attachments[index])) { if (try_push(color_attachments[index])) {
@ -880,13 +871,12 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
bool is_instanced) { bool is_instanced) {
MICROPROFILE_SCOPE(Vulkan_Geometry); MICROPROFILE_SCOPE(Vulkan_Geometry);
const auto& gpu = system.GPU().Maxwell3D(); const auto& regs = maxwell3d.regs;
const auto& regs = gpu.regs;
SetupVertexArrays(buffer_bindings); SetupVertexArrays(buffer_bindings);
const u32 base_instance = regs.vb_base_instance; const u32 base_instance = regs.vb_base_instance;
const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1; const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1;
const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first; const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count; const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
@ -947,7 +937,7 @@ void RasterizerVulkan::SetupImageTransitions(
} }
void RasterizerVulkan::UpdateDynamicStates() { void RasterizerVulkan::UpdateDynamicStates() {
auto& regs = system.GPU().Maxwell3D().regs; auto& regs = maxwell3d.regs;
UpdateViewportsState(regs); UpdateViewportsState(regs);
UpdateScissorsState(regs); UpdateScissorsState(regs);
UpdateDepthBias(regs); UpdateDepthBias(regs);
@ -968,7 +958,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
} }
void RasterizerVulkan::BeginTransformFeedback() { void RasterizerVulkan::BeginTransformFeedback() {
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) { if (regs.tfb_enabled == 0) {
return; return;
} }
@ -1000,7 +990,7 @@ void RasterizerVulkan::BeginTransformFeedback() {
} }
void RasterizerVulkan::EndTransformFeedback() { void RasterizerVulkan::EndTransformFeedback() {
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) { if (regs.tfb_enabled == 0) {
return; return;
} }
@ -1013,7 +1003,7 @@ void RasterizerVulkan::EndTransformFeedback() {
} }
void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = maxwell3d.regs;
for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
const auto& vertex_array = regs.vertex_array[index]; const auto& vertex_array = regs.vertex_array[index];
@ -1039,7 +1029,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
if (params.num_vertices == 0) { if (params.num_vertices == 0) {
return; return;
} }
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = maxwell3d.regs;
switch (regs.draw.topology) { switch (regs.draw.topology) {
case Maxwell::PrimitiveTopology::Quads: { case Maxwell::PrimitiveTopology::Quads: {
if (!params.is_indexed) { if (!params.is_indexed) {
@ -1087,8 +1077,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) { void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_ConstBuffers); MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
const auto& gpu = system.GPU().Maxwell3D(); const auto& shader_stage = maxwell3d.state.shader_stages[stage];
const auto& shader_stage = gpu.state.shader_stages[stage];
for (const auto& entry : entries.const_buffers) { for (const auto& entry : entries.const_buffers) {
SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]); SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
} }
@ -1096,8 +1085,7 @@ void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, s
void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) { void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
auto& gpu{system.GPU()}; const auto& cbufs{maxwell3d.state.shader_stages[stage]};
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage]};
for (const auto& entry : entries.global_buffers) { for (const auto& entry : entries.global_buffers) {
const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset(); const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
@ -1107,19 +1095,17 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries,
void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) { void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Textures); MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.uniform_texels) { for (const auto& entry : entries.uniform_texels) {
const auto image = GetTextureInfo(gpu, entry, stage).tic; const auto image = GetTextureInfo(maxwell3d, entry, stage).tic;
SetupUniformTexels(image, entry); SetupUniformTexels(image, entry);
} }
} }
void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) { void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Textures); MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.samplers) { for (const auto& entry : entries.samplers) {
for (std::size_t i = 0; i < entry.size; ++i) { for (std::size_t i = 0; i < entry.size; ++i) {
const auto texture = GetTextureInfo(gpu, entry, stage, i); const auto texture = GetTextureInfo(maxwell3d, entry, stage, i);
SetupTexture(texture, entry); SetupTexture(texture, entry);
} }
} }
@ -1127,25 +1113,23 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::
void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) { void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Textures); MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.storage_texels) { for (const auto& entry : entries.storage_texels) {
const auto image = GetTextureInfo(gpu, entry, stage).tic; const auto image = GetTextureInfo(maxwell3d, entry, stage).tic;
SetupStorageTexel(image, entry); SetupStorageTexel(image, entry);
} }
} }
void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Images); MICROPROFILE_SCOPE(Vulkan_Images);
const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.images) { for (const auto& entry : entries.images) {
const auto tic = GetTextureInfo(gpu, entry, stage).tic; const auto tic = GetTextureInfo(maxwell3d, entry, stage).tic;
SetupImage(tic, entry); SetupImage(tic, entry);
} }
} }
void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_ConstBuffers); MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const auto& launch_desc = kepler_compute.launch_description;
for (const auto& entry : entries.const_buffers) { for (const auto& entry : entries.const_buffers) {
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
@ -1159,7 +1143,7 @@ void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
const auto cbufs{system.GPU().KeplerCompute().launch_description.const_buffer_config}; const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
for (const auto& entry : entries.global_buffers) { for (const auto& entry : entries.global_buffers) {
const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
SetupGlobalBuffer(entry, addr); SetupGlobalBuffer(entry, addr);
@ -1168,19 +1152,17 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures); MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.uniform_texels) { for (const auto& entry : entries.uniform_texels) {
const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic;
SetupUniformTexels(image, entry); SetupUniformTexels(image, entry);
} }
} }
void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures); MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.samplers) { for (const auto& entry : entries.samplers) {
for (std::size_t i = 0; i < entry.size; ++i) { for (std::size_t i = 0; i < entry.size; ++i) {
const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex, i); const auto texture = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex, i);
SetupTexture(texture, entry); SetupTexture(texture, entry);
} }
} }
@ -1188,18 +1170,16 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures); MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.storage_texels) { for (const auto& entry : entries.storage_texels) {
const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic;
SetupStorageTexel(image, entry); SetupStorageTexel(image, entry);
} }
} }
void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Images); MICROPROFILE_SCOPE(Vulkan_Images);
const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.images) { for (const auto& entry : entries.images) {
const auto tic = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; const auto tic = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic;
SetupImage(tic, entry); SetupImage(tic, entry);
} }
} }
@ -1223,9 +1203,8 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
} }
void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
auto& memory_manager{system.GPU().MemoryManager()}; const u64 actual_addr = gpu_memory.Read<u64>(address);
const auto actual_addr = memory_manager.Read<u64>(address); const u32 size = gpu_memory.Read<u32>(address + 8);
const auto size = memory_manager.Read<u32>(address + 8);
if (size == 0) { if (size == 0) {
// Sometimes global memory pointers don't have a proper size. Upload a dummy entry // Sometimes global memory pointers don't have a proper size. Upload a dummy entry
@ -1508,7 +1487,7 @@ std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
} }
std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = maxwell3d.regs;
std::size_t size = 0; std::size_t size = 0;
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
@ -1523,9 +1502,8 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
} }
std::size_t RasterizerVulkan::CalculateIndexBufferSize() const { std::size_t RasterizerVulkan::CalculateIndexBufferSize() const {
const auto& regs = system.GPU().Maxwell3D().regs; return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
return static_cast<std::size_t>(regs.index_array.count) * static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
} }
std::size_t RasterizerVulkan::CalculateConstBufferSize( std::size_t RasterizerVulkan::CalculateConstBufferSize(
@ -1540,7 +1518,7 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize(
} }
RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const { RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const {
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = maxwell3d.regs;
const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count); const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
RenderPassParams params; RenderPassParams params;

@ -106,7 +106,8 @@ struct ImageView {
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
public: public:
explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
VKScreenInfo& screen_info, const VKDevice& device, VKScreenInfo& screen_info, const VKDevice& device,
VKResourceManager& resource_manager, VKMemoryManager& memory_manager, VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
StateTracker& state_tracker, VKScheduler& scheduler); StateTracker& state_tracker, VKScheduler& scheduler);
@ -135,7 +136,6 @@ public:
const Tegra::Engines::Fermi2D::Config& copy_config) override; const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override; u32 pixel_stride) override;
void SetupDirtyFlags() override;
VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
return async_shaders; return async_shaders;
@ -279,8 +279,11 @@ private:
VkBuffer DefaultBuffer(); VkBuffer DefaultBuffer();
Core::System& system; Tegra::GPU& gpu;
Core::Frontend::EmuWindow& render_window; Tegra::MemoryManager& gpu_memory;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
VKScreenInfo& screen_info; VKScreenInfo& screen_info;
const VKDevice& device; const VKDevice& device;
VKResourceManager& resource_manager; VKResourceManager& resource_manager;
@ -300,8 +303,8 @@ private:
VKPipelineCache pipeline_cache; VKPipelineCache pipeline_cache;
VKBufferCache buffer_cache; VKBufferCache buffer_cache;
VKSamplerCache sampler_cache; VKSamplerCache sampler_cache;
VKFenceManager fence_manager;
VKQueryCache query_cache; VKQueryCache query_cache;
VKFenceManager fence_manager;
vk::Buffer default_buffer; vk::Buffer default_buffer;
VKMemoryCommit default_buffer_commit; VKMemoryCommit default_buffer_commit;

@ -132,12 +132,9 @@ void SetupDirtyStencilTestEnable(Tables& tables) {
} // Anonymous namespace } // Anonymous namespace
StateTracker::StateTracker(Core::System& system) StateTracker::StateTracker(Tegra::GPU& gpu)
: system{system}, invalidation_flags{MakeInvalidationFlags()} {} : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
auto& tables = gpu.Maxwell3D().dirty.tables;
void StateTracker::Initialize() {
auto& dirty = system.GPU().Maxwell3D().dirty;
auto& tables = dirty.tables;
SetupDirtyRenderTargets(tables); SetupDirtyRenderTargets(tables);
SetupDirtyViewports(tables); SetupDirtyViewports(tables);
SetupDirtyScissors(tables); SetupDirtyScissors(tables);
@ -155,9 +152,4 @@ void StateTracker::Initialize() {
SetupDirtyStencilTestEnable(tables); SetupDirtyStencilTestEnable(tables);
} }
void StateTracker::InvalidateCommandBufferState() {
system.GPU().Maxwell3D().dirty.flags |= invalidation_flags;
current_topology = INVALID_TOPOLOGY;
}
} // namespace Vulkan } // namespace Vulkan

@ -45,11 +45,12 @@ class StateTracker {
using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Maxwell = Tegra::Engines::Maxwell3D::Regs;
public: public:
explicit StateTracker(Core::System& system); explicit StateTracker(Tegra::GPU& gpu);
void Initialize(); void InvalidateCommandBufferState() {
flags |= invalidation_flags;
void InvalidateCommandBufferState(); current_topology = INVALID_TOPOLOGY;
}
bool TouchViewports() { bool TouchViewports() {
return Exchange(Dirty::Viewports, false); return Exchange(Dirty::Viewports, false);
@ -121,13 +122,12 @@ private:
static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u); static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);
bool Exchange(std::size_t id, bool new_value) const noexcept { bool Exchange(std::size_t id, bool new_value) const noexcept {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
const bool is_dirty = flags[id]; const bool is_dirty = flags[id];
flags[id] = new_value; flags[id] = new_value;
return is_dirty; return is_dirty;
} }
Core::System& system; Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags; Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY; Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
}; };

@ -57,9 +57,9 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
} // Anonymous namespace } // Anonymous namespace
VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_,
VkBufferUsageFlags usage) VkBufferUsageFlags usage)
: device{device}, scheduler{scheduler} { : device{device_}, scheduler{scheduler_} {
CreateBuffers(usage); CreateBuffers(usage);
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);

@ -188,13 +188,13 @@ u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::Swizzl
} // Anonymous namespace } // Anonymous namespace
CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, CachedSurface::CachedSurface(const VKDevice& device, VKResourceManager& resource_manager,
VKResourceManager& resource_manager, VKMemoryManager& memory_manager, VKMemoryManager& memory_manager, VKScheduler& scheduler,
VKScheduler& scheduler, VKStagingBufferPool& staging_pool, VKStagingBufferPool& staging_pool, GPUVAddr gpu_addr,
GPUVAddr gpu_addr, const SurfaceParams& params) const SurfaceParams& params)
: SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, system{system}, : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, device{device},
device{device}, resource_manager{resource_manager}, resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} { staging_pool{staging_pool} {
if (params.IsBuffer()) { if (params.IsBuffer()) {
buffer = CreateBuffer(device, params, host_memory_size); buffer = CreateBuffer(device, params, host_memory_size);
commit = memory_manager.Commit(buffer, false); commit = memory_manager.Commit(buffer, false);
@ -490,19 +490,21 @@ VkImageView CachedSurfaceView::GetAttachment() {
return *render_target; return *render_target;
} }
VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer,
const VKDevice& device, VKResourceManager& resource_manager, Tegra::Engines::Maxwell3D& maxwell3d,
VKMemoryManager& memory_manager, VKScheduler& scheduler, Tegra::MemoryManager& gpu_memory, const VKDevice& device_,
VKStagingBufferPool& staging_pool) VKResourceManager& resource_manager_,
: TextureCache(system, rasterizer, device.IsOptimalAstcSupported()), device{device}, VKMemoryManager& memory_manager_, VKScheduler& scheduler_,
resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler}, VKStagingBufferPool& staging_pool_)
staging_pool{staging_pool} {} : TextureCache(rasterizer, maxwell3d, gpu_memory, device_.IsOptimalAstcSupported()),
device{device_}, resource_manager{resource_manager_},
memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{staging_pool_} {}
VKTextureCache::~VKTextureCache() = default; VKTextureCache::~VKTextureCache() = default;
Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
return std::make_shared<CachedSurface>(system, device, resource_manager, memory_manager, return std::make_shared<CachedSurface>(device, resource_manager, memory_manager, scheduler,
scheduler, staging_pool, gpu_addr, params); staging_pool, gpu_addr, params);
} }
void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,

@ -15,10 +15,6 @@
#include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/surface_base.h"
#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/texture_cache.h"
namespace Core {
class System;
}
namespace VideoCore { namespace VideoCore {
class RasterizerInterface; class RasterizerInterface;
} }
@ -45,10 +41,10 @@ class CachedSurface final : public VideoCommon::SurfaceBase<View> {
friend CachedSurfaceView; friend CachedSurfaceView;
public: public:
explicit CachedSurface(Core::System& system, const VKDevice& device, explicit CachedSurface(const VKDevice& device, VKResourceManager& resource_manager,
VKResourceManager& resource_manager, VKMemoryManager& memory_manager, VKMemoryManager& memory_manager, VKScheduler& scheduler,
VKScheduler& scheduler, VKStagingBufferPool& staging_pool, VKStagingBufferPool& staging_pool, GPUVAddr gpu_addr,
GPUVAddr gpu_addr, const SurfaceParams& params); const SurfaceParams& params);
~CachedSurface(); ~CachedSurface();
void UploadTexture(const std::vector<u8>& staging_buffer) override; void UploadTexture(const std::vector<u8>& staging_buffer) override;
@ -101,7 +97,6 @@ private:
VkImageSubresourceRange GetImageSubresourceRange() const; VkImageSubresourceRange GetImageSubresourceRange() const;
Core::System& system;
const VKDevice& device; const VKDevice& device;
VKResourceManager& resource_manager; VKResourceManager& resource_manager;
VKMemoryManager& memory_manager; VKMemoryManager& memory_manager;
@ -201,7 +196,8 @@ private:
class VKTextureCache final : public TextureCacheBase { class VKTextureCache final : public TextureCacheBase {
public: public:
explicit VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer,
Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
const VKDevice& device, VKResourceManager& resource_manager, const VKDevice& device, VKResourceManager& resource_manager,
VKMemoryManager& memory_manager, VKScheduler& scheduler, VKMemoryManager& memory_manager, VKScheduler& scheduler,
VKStagingBufferPool& staging_pool); VKStagingBufferPool& staging_pool);

@ -16,11 +16,10 @@
namespace VideoCommon::Shader { namespace VideoCommon::Shader {
GPUVAddr GetShaderAddress(Core::System& system, GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) { Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
const auto& gpu{system.GPU().Maxwell3D()}; const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]};
const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]}; return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset;
return gpu.regs.code_address.CodeAddress() + shader_config.offset;
} }
bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {

@ -11,10 +11,6 @@
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h" #include "video_core/engines/shader_type.h"
namespace Core {
class System;
}
namespace Tegra { namespace Tegra {
class MemoryManager; class MemoryManager;
} }
@ -27,7 +23,7 @@ constexpr u32 STAGE_MAIN_OFFSET = 10;
constexpr u32 KERNEL_MAIN_OFFSET = 0; constexpr u32 KERNEL_MAIN_OFFSET = 0;
/// Gets the address for the specified shader stage program /// Gets the address for the specified shader stage program
GPUVAddr GetShaderAddress(Core::System& system, GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::Engines::Maxwell3D::Regs::ShaderProgram program); Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
/// Gets if the current instruction offset is a scheduler instruction /// Gets if the current instruction offset is a scheduler instruction

@ -163,13 +163,11 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl
return params; return params;
} }
SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) { SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) {
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = maxwell3d.regs;
const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U); const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
const bool is_layered = regs.zeta_layers > 1 && block_depth == 0; const bool is_layered = regs.zeta_layers > 1 && block_depth == 0;
const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
return { return {
.is_tiled = regs.zeta.memory_layout.type == .is_tiled = regs.zeta.memory_layout.type ==
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear, Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear,
@ -191,8 +189,9 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) {
}; };
} }
SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) { SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; std::size_t index) {
const auto& config{maxwell3d.regs.rt[index]};
SurfaceParams params; SurfaceParams params;
params.is_tiled = params.is_tiled =
config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;

@ -33,10 +33,11 @@ public:
const VideoCommon::Shader::Image& entry); const VideoCommon::Shader::Image& entry);
/// Creates SurfaceCachedParams for a depth buffer configuration. /// Creates SurfaceCachedParams for a depth buffer configuration.
static SurfaceParams CreateForDepthBuffer(Core::System& system); static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d);
/// Creates SurfaceCachedParams from a framebuffer configuration. /// Creates SurfaceCachedParams from a framebuffer configuration.
static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
std::size_t index);
/// Creates SurfaceCachedParams from a Fermi2D surface configuration. /// Creates SurfaceCachedParams from a Fermi2D surface configuration.
static SurfaceParams CreateForFermiCopySurface( static SurfaceParams CreateForFermiCopySurface(

@ -135,8 +135,7 @@ public:
return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
} }
const std::optional<VAddr> cpu_addr = const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cpu_addr) { if (!cpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
} }
@ -160,8 +159,7 @@ public:
if (!gpu_addr) { if (!gpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
} }
const std::optional<VAddr> cpu_addr = const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cpu_addr) { if (!cpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
} }
@ -183,11 +181,11 @@ public:
TView GetDepthBufferSurface(bool preserve_contents) { TView GetDepthBufferSurface(bool preserve_contents) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
auto& maxwell3d = system.GPU().Maxwell3D(); auto& dirty = maxwell3d.dirty;
if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
return depth_buffer.view; return depth_buffer.view;
} }
maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false; dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
const auto& regs{maxwell3d.regs}; const auto& regs{maxwell3d.regs};
const auto gpu_addr{regs.zeta.Address()}; const auto gpu_addr{regs.zeta.Address()};
@ -195,13 +193,12 @@ public:
SetEmptyDepthBuffer(); SetEmptyDepthBuffer();
return {}; return {};
} }
const std::optional<VAddr> cpu_addr = const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cpu_addr) { if (!cpu_addr) {
SetEmptyDepthBuffer(); SetEmptyDepthBuffer();
return {}; return {};
} }
const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)};
auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true); auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
if (depth_buffer.target) if (depth_buffer.target)
depth_buffer.target->MarkAsRenderTarget(false, NO_RT); depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
@ -215,7 +212,6 @@ public:
TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) { if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) {
return render_targets[index].view; return render_targets[index].view;
} }
@ -235,15 +231,14 @@ public:
return {}; return {};
} }
const std::optional<VAddr> cpu_addr = const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cpu_addr) { if (!cpu_addr) {
SetEmptyColorBuffer(index); SetEmptyColorBuffer(index);
return {}; return {};
} }
auto surface_view = auto surface_view =
GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index), GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index),
preserve_contents, true); preserve_contents, true);
if (render_targets[index].target) { if (render_targets[index].target) {
auto& surface = render_targets[index].target; auto& surface = render_targets[index].target;
@ -300,9 +295,8 @@ public:
const GPUVAddr dst_gpu_addr = dst_config.Address(); const GPUVAddr dst_gpu_addr = dst_config.Address();
DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
const auto& memory_manager = system.GPU().MemoryManager(); const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr);
const std::optional<VAddr> dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr); const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr);
const std::optional<VAddr> src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr);
std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second; TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
ImageBlit(src_surface, dst_surface.second, copy_config); ImageBlit(src_surface, dst_surface.second, copy_config);
@ -358,9 +352,11 @@ public:
} }
protected: protected:
explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_,
bool is_astc_supported) Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
: system{system}, is_astc_supported{is_astc_supported}, rasterizer{rasterizer} { bool is_astc_supported_)
: is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
gpu_memory{gpu_memory_} {
for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
SetEmptyColorBuffer(i); SetEmptyColorBuffer(i);
} }
@ -395,7 +391,7 @@ protected:
virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
void ManageRenderTargetUnregister(TSurface& surface) { void ManageRenderTargetUnregister(TSurface& surface) {
auto& dirty = system.GPU().Maxwell3D().dirty; auto& dirty = maxwell3d.dirty;
const u32 index = surface->GetRenderTarget(); const u32 index = surface->GetRenderTarget();
if (index == DEPTH_RT) { if (index == DEPTH_RT) {
dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true; dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true;
@ -408,8 +404,7 @@ protected:
void Register(TSurface surface) { void Register(TSurface surface) {
const GPUVAddr gpu_addr = surface->GetGpuAddr(); const GPUVAddr gpu_addr = surface->GetGpuAddr();
const std::size_t size = surface->GetSizeInBytes(); const std::size_t size = surface->GetSizeInBytes();
const std::optional<VAddr> cpu_addr = const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cpu_addr) { if (!cpu_addr) {
LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
gpu_addr); gpu_addr);
@ -459,7 +454,6 @@ protected:
return new_surface; return new_surface;
} }
Core::System& system;
const bool is_astc_supported; const bool is_astc_supported;
private: private:
@ -954,8 +948,7 @@ private:
* @param params The parameters on the candidate surface. * @param params The parameters on the candidate surface.
**/ **/
Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
const std::optional<VAddr> cpu_addr = const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cpu_addr) { if (!cpu_addr) {
Deduction result{}; Deduction result{};
@ -1112,7 +1105,7 @@ private:
void LoadSurface(const TSurface& surface) { void LoadSurface(const TSurface& surface) {
staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache); surface->LoadBuffer(gpu_memory, staging_cache);
surface->UploadTexture(staging_cache.GetBuffer(0)); surface->UploadTexture(staging_cache.GetBuffer(0));
surface->MarkAsModified(false, Tick()); surface->MarkAsModified(false, Tick());
} }
@ -1123,7 +1116,7 @@ private:
} }
staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
surface->DownloadTexture(staging_cache.GetBuffer(0)); surface->DownloadTexture(staging_cache.GetBuffer(0));
surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache); surface->FlushBuffer(gpu_memory, staging_cache);
surface->MarkAsModified(false, Tick()); surface->MarkAsModified(false, Tick());
} }
@ -1253,6 +1246,8 @@ private:
} }
VideoCore::RasterizerInterface& rasterizer; VideoCore::RasterizerInterface& rasterizer;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::MemoryManager& gpu_memory;
FormatLookupTable format_lookup_table; FormatLookupTable format_lookup_table;
FormatCompatibility format_compatibility; FormatCompatibility format_compatibility;

@ -21,14 +21,17 @@ namespace {
std::unique_ptr<VideoCore::RendererBase> CreateRenderer( std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
std::unique_ptr<Core::Frontend::GraphicsContext> context) { std::unique_ptr<Core::Frontend::GraphicsContext> context) {
auto& telemetry_session = system.TelemetrySession();
auto& cpu_memory = system.Memory();
switch (Settings::values.renderer_backend.GetValue()) { switch (Settings::values.renderer_backend.GetValue()) {
case Settings::RendererBackend::OpenGL: case Settings::RendererBackend::OpenGL:
return std::make_unique<OpenGL::RendererOpenGL>(system, emu_window, gpu, return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, cpu_memory,
std::move(context)); gpu, std::move(context));
#ifdef HAS_VULKAN #ifdef HAS_VULKAN
case Settings::RendererBackend::Vulkan: case Settings::RendererBackend::Vulkan:
return std::make_unique<Vulkan::RendererVulkan>(system, emu_window, gpu, return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, cpu_memory,
std::move(context)); gpu, std::move(context));
#endif #endif
default: default:
return nullptr; return nullptr;

@ -30,6 +30,7 @@
#include "common/scope_exit.h" #include "common/scope_exit.h"
#include "core/core.h" #include "core/core.h"
#include "core/frontend/framebuffer_layout.h" #include "core/frontend/framebuffer_layout.h"
#include "core/hle/kernel/process.h"
#include "core/settings.h" #include "core/settings.h"
#include "input_common/keyboard.h" #include "input_common/keyboard.h"
#include "input_common/main.h" #include "input_common/main.h"
@ -63,7 +64,8 @@ void EmuThread::run() {
emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
system.Renderer().Rasterizer().LoadDiskResources( system.Renderer().Rasterizer().LoadDiskResources(
stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { system.CurrentProcess()->GetTitleID(), stop_run,
[this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
emit LoadProgress(stage, value, total); emit LoadProgress(stage, value, total);
}); });

@ -26,6 +26,7 @@
#include "core/file_sys/registered_cache.h" #include "core/file_sys/registered_cache.h"
#include "core/file_sys/vfs_real.h" #include "core/file_sys/vfs_real.h"
#include "core/gdbstub/gdbstub.h" #include "core/gdbstub/gdbstub.h"
#include "core/hle/kernel/process.h"
#include "core/hle/service/filesystem/filesystem.h" #include "core/hle/service/filesystem/filesystem.h"
#include "core/loader/loader.h" #include "core/loader/loader.h"
#include "core/settings.h" #include "core/settings.h"
@ -235,7 +236,9 @@ int main(int argc, char** argv) {
// Core is loaded, start the GPU (makes the GPU contexts current to this thread) // Core is loaded, start the GPU (makes the GPU contexts current to this thread)
system.GPU().Start(); system.GPU().Start();
system.Renderer().Rasterizer().LoadDiskResources(); system.Renderer().Rasterizer().LoadDiskResources(
system.CurrentProcess()->GetTitleID(), false,
[](VideoCore::LoadCallbackStage, size_t value, size_t total) {});
std::thread render_thread([&emu_window] { emu_window->Present(); }); std::thread render_thread([&emu_window] { emu_window->Present(); });
system.Run(); system.Run();

@ -255,7 +255,6 @@ int main(int argc, char** argv) {
"SDLHideTester"); "SDLHideTester");
system.GPU().Start(); system.GPU().Start();
system.Renderer().Rasterizer().LoadDiskResources();
system.Run(); system.Run();
while (!finished) { while (!finished) {