GPU: Implement guest driver profile and deduce texture handler sizes.

master
Fernando Sahmkow 2020-01-03 16:16:29 +07:00 committed by FernandoS27
parent a104b985a8
commit c921e496eb
13 changed files with 127 additions and 0 deletions

@ -29,6 +29,8 @@ add_library(video_core STATIC
gpu_synch.h
gpu_thread.cpp
gpu_thread.h
guest_driver.cpp
guest_driver.h
macro_interpreter.cpp
macro_interpreter.h
memory_manager.cpp

@ -9,6 +9,7 @@
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_type.h"
#include "video_core/guest_driver.h"
#include "video_core/textures/texture.h"
namespace Tegra::Engines {
@ -106,6 +107,8 @@ public:
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const = 0;
virtual u32 GetBoundBuffer() const = 0;
virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
};
} // namespace Tegra::Engines

@ -94,6 +94,10 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
return result;
}
VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
return rasterizer.AccessGuestDriverProfile();
}
void KeplerCompute::ProcessLaunch() {
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,

@ -218,6 +218,8 @@ public:
return regs.tex_cb_index;
}
VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
private:
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;

@ -784,4 +784,8 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
return result;
}
VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
return rasterizer.AccessGuestDriverProfile();
}
} // namespace Tegra::Engines

@ -1306,6 +1306,8 @@ public:
return regs.tex_cb_index;
}
VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
/// we've seen used.
using MacroMemory = std::array<u32, 0x40000>;

@ -0,0 +1,34 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/guest_driver.h"
namespace VideoCore {
void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) {
if (texture_handler_size_deduced) {
return;
}
std::size_t size = bound_offsets.size();
if (size < 2) {
return;
}
std::sort(bound_offsets.begin(), bound_offsets.end(),
[](const u32& a, const u32& b) { return a < b; });
u32 min_val = 0xFFFFFFFF; // set to highest possible 32 bit integer;
for (std::size_t i = 1; i < size; i++) {
if (bound_offsets[i] == bound_offsets[i - 1]) {
continue;
}
const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
min_val = std::min(min_val, new_min);
}
if (min_val > 2) {
return;
}
texture_handler_size_deduced = true;
texture_handler_size = sizeof(u32) * min_val;
}
} // namespace VideoCore

@ -0,0 +1,37 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <vector>
#include "common/common_types.h"
namespace VideoCore {
/**
* The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
* information necessary for impossible to avoid HLE methods like shader tracks.
*/
class GuestDriverProfile {
public:
u32 GetTextureHandlerSize() const {
return texture_handler_size;
}
bool TextureHandlerSizeKnown() const {
return texture_handler_size_deduced;
}
void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets);
private:
// This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily
// use 4 bytes instead. Thus, certain drivers may squish the size.
static constexpr u32 default_texture_handler_size = 8;
u32 texture_handler_size{default_texture_handler_size};
bool texture_handler_size_deduced{};
};
} // namespace VideoCore

@ -9,6 +9,7 @@
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h"
#include "video_core/guest_driver.h"
namespace Tegra {
class MemoryManager;
@ -78,5 +79,12 @@ public:
/// Initialize disk cached resources for the game being emulated
virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
const DiskResourceLoadCallback& callback = {}) {}
GuestDriverProfile& AccessGuestDriverProfile() {
return guest_driver_profile;
}
private:
GuestDriverProfile guest_driver_profile{};
};
} // namespace VideoCore

@ -10,6 +10,7 @@
#include "common/hash.h"
#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/shader_type.h"
#include "video_core/guest_driver.h"
namespace VideoCommon::Shader {
@ -71,6 +72,13 @@ public:
return bindless_samplers;
}
VideoCore::GuestDriverProfile* AccessGuestDriverProfile() {
if (engine) {
return &(engine->AccessGuestDriverProfile());
}
return nullptr;
}
private:
const Tegra::Engines::ShaderType stage;
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;

@ -315,4 +315,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
return pc + 1;
}
void ShaderIR::PostDecode() {
// Deduce texture handler size if needed
auto* gpu_driver = locker.AccessGuestDriverProfile();
if (gpu_driver) {
if (!gpu_driver->TextureHandlerSizeKnown() && used_samplers.size() > 1) {
u32 count{};
std::vector<u32> bound_offsets;
for (const auto& sampler : used_samplers) {
if (sampler.IsBindless()) {
continue;
}
count++;
bound_offsets.emplace_back(sampler.GetOffset());
}
if (count > 1) {
gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
}
}
}
}
} // namespace VideoCommon::Shader

@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet
ConstBufferLocker& locker)
: program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
Decode();
PostDecode();
}
ShaderIR::~ShaderIR() = default;

@ -191,6 +191,7 @@ private:
};
void Decode();
void PostDecode();
NodeBlock DecodeRange(u32 begin, u32 end);
void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);