Shader_Cache: setup connection of ConstBufferLocker

master
Fernando Sahmkow 2019-09-23 15:40:58 +07:00 committed by FernandoS27
parent 1a58f45d76
commit acd6441134
10 changed files with 82 additions and 43 deletions

@ -1006,7 +1006,8 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel)
}
const auto cbuf = entry.GetBindlessCBuf();
Tegra::Texture::TextureHandle tex_handle;
tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, cbuf.first, cbuf.second);
tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute,
cbuf.first, cbuf.second);
return compute.GetTextureInfo(tex_handle, entry.GetOffset());
}();
@ -1051,7 +1052,8 @@ void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
}
const auto cbuf = entry.GetBindlessCBuf();
Tegra::Texture::TextureHandle tex_handle;
tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, cbuf.first, cbuf.second);
tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute,
cbuf.first, cbuf.second);
return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic;
}();
SetupImage(bindpoint, tic, entry);

@ -10,6 +10,7 @@
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
@ -173,8 +174,9 @@ u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
}
/// Creates an unspecialized program from code streams
GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type,
ProgramCode program_code, ProgramCode program_code_b) {
GLShader::ProgramResult CreateProgram(Core::System& system, const Device& device,
ProgramType program_type, ProgramCode program_code,
ProgramCode program_code_b) {
GLShader::ShaderSetup setup(program_code);
setup.program.size_a = CalculateProgramSize(program_code);
setup.program.size_b = 0;
@ -190,14 +192,25 @@ GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_
switch (program_type) {
case ProgramType::VertexA:
case ProgramType::VertexB:
return GLShader::GenerateVertexShader(device, setup);
case ProgramType::Geometry:
return GLShader::GenerateGeometryShader(device, setup);
case ProgramType::Fragment:
return GLShader::GenerateFragmentShader(device, setup);
case ProgramType::Compute:
return GLShader::GenerateComputeShader(device, setup);
case ProgramType::VertexB: {
VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Vertex,
&(system.GPU().Maxwell3D())};
return GLShader::GenerateVertexShader(locker, device, setup);
}
case ProgramType::Geometry: {
VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Geometry,
&(system.GPU().Maxwell3D())};
return GLShader::GenerateGeometryShader(locker, device, setup);
}
case ProgramType::Fragment: {
VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Fragment,
&(system.GPU().Maxwell3D())};
return GLShader::GenerateFragmentShader(locker, device, setup);
}
case ProgramType::Compute: {
VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Compute, &(system.GPU().KeplerCompute())};
return GLShader::GenerateComputeShader(locker, device, setup);
}
default:
UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));
return {};
@ -307,8 +320,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
ProgramCode&& program_code_b) {
const auto code_size{CalculateProgramSize(program_code)};
const auto code_size_b{CalculateProgramSize(program_code_b)};
auto result{
CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)};
auto result{CreateProgram(params.system, params.device, GetProgramType(program_type),
program_code, program_code_b)};
if (result.first.empty()) {
// TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
return {};
@ -331,7 +344,7 @@ Shader CachedShader::CreateStageFromCache(const ShaderParameters& params,
}
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) {
auto result{CreateProgram(params.device, ProgramType::Compute, code, {})};
auto result{CreateProgram(params.system, params.device, ProgramType::Compute, code, {})};
const auto code_size{CalculateProgramSize(code)};
params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute,
@ -566,7 +579,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
result = {stored_decompiled.code, stored_decompiled.entries};
} else {
// Otherwise decompile the shader at boot and save the result to the decompiled file
result = CreateProgram(device, raw.GetProgramType(), raw.GetProgramCode(),
result = CreateProgram(system, device, raw.GetProgramType(), raw.GetProgramCode(),
raw.GetProgramCodeB());
disk_cache.SaveDecompiled(unique_identifier, result.first, result.second);
}
@ -612,7 +625,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const auto unique_identifier =
GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b);
const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
const ShaderParameters params{disk_cache, precompiled_programs, system, device, cpu_addr,
host_ptr, unique_identifier};
const auto found = precompiled_shaders.find(unique_identifier);
@ -639,7 +652,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})};
const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
const ShaderParameters params{disk_cache, precompiled_programs, system, device, cpu_addr,
host_ptr, unique_identifier};
const auto found = precompiled_shaders.find(unique_identifier);

@ -45,6 +45,7 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
struct ShaderParameters {
ShaderDiskCacheOpenGL& disk_cache;
const PrecompiledPrograms& precompiled_programs;
Core::System& system;
const Device& device;
VAddr cpu_addr;
u8* host_ptr;

@ -21,7 +21,8 @@ static constexpr u32 COMPUTE_OFFSET = 0;
static constexpr CompilerSettings settings{CompileDepth::NoFlowStack, true};
ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
ProgramResult GenerateVertexShader(ConstBufferLocker& locker, const Device& device,
const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: VS" + id + "\n\n";
@ -35,14 +36,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
)";
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings);
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings,
locker);
const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB;
ProgramResult program = Decompile(device, program_ir, stage, "vertex");
out += program.first;
if (setup.IsDualProgram()) {
const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b,
settings);
settings, locker);
ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b");
out += program_b.first;
}
@ -71,7 +73,8 @@ void main() {
return {std::move(out), std::move(program.second)};
}
ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) {
ProgramResult GenerateGeometryShader(ConstBufferLocker& locker, const Device& device,
const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: GS" + id + "\n\n";
@ -85,7 +88,8 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
)";
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings);
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings,
locker);
ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry");
out += program.first;
@ -97,7 +101,8 @@ void main() {
return {std::move(out), std::move(program.second)};
}
ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) {
ProgramResult GenerateFragmentShader(ConstBufferLocker& locker, const Device& device,
const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: FS" + id + "\n\n";
@ -120,7 +125,8 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
)";
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings);
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings,
locker);
ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment");
out += program.first;
@ -133,13 +139,15 @@ void main() {
return {std::move(out), std::move(program.second)};
}
ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) {
ProgramResult GenerateComputeShader(ConstBufferLocker& locker, const Device& device,
const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: CS" + id + "\n\n";
out += GetCommonDeclarations();
const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings);
const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings,
locker);
ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute");
out += program.first;

@ -16,6 +16,7 @@ class Device;
namespace OpenGL::GLShader {
using VideoCommon::Shader::ConstBufferLocker;
using VideoCommon::Shader::ProgramCode;
struct ShaderSetup {
@ -46,15 +47,19 @@ private:
};
/// Generates the GLSL vertex shader program source code for the given VS program
ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup);
ProgramResult GenerateVertexShader(ConstBufferLocker& locker, const Device& device,
const ShaderSetup& setup);
/// Generates the GLSL geometry shader program source code for the given GS program
ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup);
ProgramResult GenerateGeometryShader(ConstBufferLocker& locker, const Device& device,
const ShaderSetup& setup);
/// Generates the GLSL fragment shader program source code for the given FS program
ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup);
ProgramResult GenerateFragmentShader(ConstBufferLocker& locker, const Device& device,
const ShaderSetup& setup);
/// Generates the GLSL compute shader program source code for the given CS program
ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup);
ProgramResult GenerateComputeShader(ConstBufferLocker& locker, const Device& device,
const ShaderSetup& setup);
} // namespace OpenGL::GLShader

@ -57,8 +57,8 @@ struct BlockInfo {
struct CFGRebuildState {
explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size,
const u32 start)
: start{start}, program_code{program_code}, program_size{program_size} {}
const u32 start, ConstBufferLocker& locker)
: start{start}, program_code{program_code}, program_size{program_size}, locker{locker} {}
u32 start{};
std::vector<BlockInfo> block_info{};
@ -72,6 +72,7 @@ struct CFGRebuildState {
const ProgramCode& program_code;
const std::size_t program_size;
ASTManager* manager;
ConstBufferLocker& locker;
};
enum class BlockCollision : u32 { None, Found, Inside };
@ -214,7 +215,7 @@ std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState&
if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) {
if (instr.gpr0.Value() == track_register) {
track_register = instr.gpr8.Value();
result.entries = instr.alu.GetSignedImm20_20();
result.entries = instr.alu.GetSignedImm20_20() + 1;
pos--;
found_track = true;
break;
@ -406,8 +407,14 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
auto tmp = TrackBranchIndirectInfo(state, address, offset);
if (tmp) {
auto result = *tmp;
LOG_CRITICAL(HW_GPU, "Track Successful, BRX: buffer:{}, offset:{}, entries:{}",
result.buffer, result.offset, result.entries);
std::string entries{};
for (u32 i = 0; i < result.entries; i++) {
auto k = locker.ObtainKey(result.buffer, result.offset + i * 4);
entries = entries + std::to_string(*k) + '\n';
}
LOG_CRITICAL(HW_GPU,
"Track Successful, BRX: buffer:{}, offset:{}, entries:{}, inner:\n{}",
result.buffer, result.offset, result.entries, entries);
} else {
LOG_CRITICAL(HW_GPU, "Track Unsuccesful");
}
@ -588,14 +595,15 @@ void DecompileShader(CFGRebuildState& state) {
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
std::size_t program_size, u32 start_address,
const CompilerSettings& settings) {
const CompilerSettings& settings,
ConstBufferLocker& locker) {
auto result_out = std::make_unique<ShaderCharacteristics>();
if (settings.depth == CompileDepth::BruteForce) {
result_out->settings.depth = CompileDepth::BruteForce;
return result_out;
}
CFGRebuildState state{program_code, program_size, start_address};
CFGRebuildState state{program_code, program_size, start_address, locker};
// Inspect Code and generate blocks
state.labels.clear();
state.labels.emplace(start_address);

@ -78,6 +78,7 @@ struct ShaderCharacteristics {
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
std::size_t program_size, u32 start_address,
const CompilerSettings& settings);
const CompilerSettings& settings,
ConstBufferLocker& locker);
} // namespace VideoCommon::Shader

@ -102,7 +102,7 @@ void ShaderIR::Decode() {
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
decompiled = false;
auto info = ScanFlow(program_code, program_size, main_offset, settings);
auto info = ScanFlow(program_code, program_size, main_offset, settings, locker);
auto& shader_info = *info;
coverage_begin = shader_info.start;
coverage_end = shader_info.end;

@ -23,9 +23,9 @@ using Tegra::Shader::PredOperation;
using Tegra::Shader::Register;
ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size,
CompilerSettings settings)
CompilerSettings settings, ConstBufferLocker& locker)
: program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{},
program_manager{true, true}, settings{settings} {
program_manager{true, true}, settings{settings}, locker{locker} {
Decode();
}

@ -68,7 +68,7 @@ struct GlobalMemoryUsage {
class ShaderIR final {
public:
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size,
CompilerSettings settings);
CompilerSettings settings, ConstBufferLocker& locker);
~ShaderIR();
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@ -389,6 +389,7 @@ private:
NodeBlock global_code;
ASTManager program_manager;
CompilerSettings settings{};
ConstBufferLocker& locker;
std::set<u32> used_registers;
std::set<Tegra::Shader::Pred> used_predicates;