gl_shader_decompiler: Use variable AOFFI on supported hardware

master
ReinUsesLisp 2019-04-10 18:03:52 +07:00
parent 0032821864
commit f15c59a164
10 changed files with 100 additions and 69 deletions

@ -5,6 +5,7 @@
#include <cstddef>
#include <glad/glad.h>
#include "common/logging/log.h"
#include "video_core/renderer_opengl/gl_device.h"
namespace OpenGL {
@ -18,10 +19,27 @@ T GetInteger(GLenum pname) {
}
} // Anonymous namespace
Device::Device() = default;
void Device::Initialize() {
Device::Device() {
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
has_variable_aoffi = TestVariableAoffi();
}
bool Device::TestVariableAoffi() {
const GLchar* AOFFI_TEST = R"(#version 430 core
uniform sampler2D tex;
uniform ivec2 variable_offset;
void main() {
gl_Position = textureOffset(tex, vec2(0), variable_offset);
}
)";
const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &AOFFI_TEST)};
GLint link_status{};
glGetProgramiv(shader, GL_LINK_STATUS, &link_status);
glDeleteProgram(shader);
const bool supported{link_status == GL_TRUE};
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", supported);
return supported;
}
} // namespace OpenGL

@ -12,14 +12,19 @@ class Device {
public:
Device();
void Initialize();
std::size_t GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
}
bool HasVariableAoffi() const {
return has_variable_aoffi;
}
private:
static bool TestVariableAoffi();
std::size_t uniform_buffer_alignment{};
bool has_variable_aoffi{};
};
} // namespace OpenGL

@ -99,7 +99,7 @@ struct FramebufferCacheKey {
};
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info)
: res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system},
: res_cache{*this}, shader_cache{*this, system, device}, global_cache{*this}, system{system},
screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
// Create sampler objects
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
@ -107,8 +107,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info)
state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
}
device.Initialize();
OpenGLState::ApplyDefaultState();
shader_program_manager = std::make_unique<GLShader::ProgramManager>();

@ -204,6 +204,7 @@ private:
/// but are needed for correct emulation
void CheckExtensions();
const Device device;
OpenGLState state;
RasterizerCacheOpenGL res_cache;
@ -211,11 +212,8 @@ private:
GlobalRegionCacheOpenGL global_cache;
Core::System& system;
ScreenInfo& screen_info;
Device device;
std::unique_ptr<GLShader::ProgramManager> shader_program_manager;
std::map<std::array<Tegra::Engines::Maxwell3D::Regs::VertexAttribute,
Tegra::Engines::Maxwell3D::Regs::NumVertexAttributes>,

@ -134,8 +134,8 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode&
}
/// Creates an unspecialized program from code streams
GLShader::ProgramResult CreateProgram(Maxwell::ShaderProgram program_type, ProgramCode program_code,
ProgramCode program_code_b) {
GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type,
ProgramCode program_code, ProgramCode program_code_b) {
GLShader::ShaderSetup setup(program_code);
if (program_type == Maxwell::ShaderProgram::VertexA) {
// VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
@ -149,11 +149,11 @@ GLShader::ProgramResult CreateProgram(Maxwell::ShaderProgram program_type, Progr
switch (program_type) {
case Maxwell::ShaderProgram::VertexA:
case Maxwell::ShaderProgram::VertexB:
return GLShader::GenerateVertexShader(setup);
return GLShader::GenerateVertexShader(device, setup);
case Maxwell::ShaderProgram::Geometry:
return GLShader::GenerateGeometryShader(setup);
return GLShader::GenerateGeometryShader(device, setup);
case Maxwell::ShaderProgram::Fragment:
return GLShader::GenerateFragmentShader(setup);
return GLShader::GenerateFragmentShader(device, setup);
default:
LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
UNREACHABLE();
@ -212,22 +212,20 @@ std::set<GLenum> GetSupportedFormats() {
return supported_formats;
}
} // namespace
} // Anonymous namespace
CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
CachedShader::CachedShader(const Device& device, VAddr cpu_addr, u64 unique_identifier,
Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs,
ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
: RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr},
unique_identifier{unique_identifier}, program_type{program_type}, disk_cache{disk_cache},
precompiled_programs{precompiled_programs} {
const std::size_t code_size = CalculateProgramSize(program_code);
const std::size_t code_size_b =
program_code_b.empty() ? 0 : CalculateProgramSize(program_code_b);
GLShader::ProgramResult program_result =
CreateProgram(program_type, program_code, program_code_b);
const std::size_t code_size{CalculateProgramSize(program_code)};
const std::size_t code_size_b{program_code_b.empty() ? 0
: CalculateProgramSize(program_code_b)};
GLShader::ProgramResult program_result{
CreateProgram(device, program_type, program_code, program_code_b)};
if (program_result.first.empty()) {
// TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
return;
@ -251,7 +249,6 @@ CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
: RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier},
program_type{program_type}, disk_cache{disk_cache}, precompiled_programs{
precompiled_programs} {
code = std::move(result.first);
entries = result.second;
shader_length = entries.shader_length;
@ -344,8 +341,9 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
return {unique_identifier, base_bindings, primitive_mode};
}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system)
: RasterizerCache{rasterizer}, disk_cache{system} {}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
const Device& device)
: RasterizerCache{rasterizer}, disk_cache{system}, device{device} {}
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
@ -439,17 +437,18 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) {
std::unordered_map<u64, UnspecializedShader> unspecialized;
if (callback)
if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
}
for (std::size_t i = 0; i < raws.size(); ++i) {
if (stop_loading)
if (stop_loading) {
return {};
}
const auto& raw{raws[i]};
const u64 unique_identifier = raw.GetUniqueIdentifier();
const u64 calculated_hash =
GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB());
const u64 unique_identifier{raw.GetUniqueIdentifier()};
const u64 calculated_hash{
GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())};
if (unique_identifier != calculated_hash) {
LOG_ERROR(
Render_OpenGL,
@ -466,8 +465,8 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
result = {stored_decompiled.code, stored_decompiled.entries};
} else {
// Otherwise decompile the shader at boot and save the result to the decompiled file
result =
CreateProgram(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB());
result = CreateProgram(device, raw.GetProgramType(), raw.GetProgramCode(),
raw.GetProgramCodeB());
disk_cache.SaveDecompiled(unique_identifier, result.first, result.second);
}
@ -477,8 +476,9 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
{raw.GetUniqueIdentifier(),
{std::move(result.first), std::move(result.second), raw.GetProgramType()}});
if (callback)
if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
}
}
return unspecialized;
}
@ -512,7 +512,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
precompiled_programs, found->second, host_ptr);
} else {
shader = std::make_shared<CachedShader>(
cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
std::move(program_code), std::move(program_code_b), host_ptr);
}
Register(shader);

@ -27,6 +27,7 @@ class System;
namespace OpenGL {
class CachedShader;
class Device;
class RasterizerOpenGL;
struct UnspecializedShader;
@ -38,7 +39,7 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
class CachedShader final : public RasterizerCacheObject {
public:
explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
explicit CachedShader(const Device& device, VAddr cpu_addr, u64 unique_identifier,
Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs,
ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr);
@ -112,7 +113,8 @@ private:
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
public:
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system);
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
const Device& device);
/// Loads disk cache for the current game
void LoadDiskCache(const std::atomic_bool& stop_loading,
@ -130,6 +132,8 @@ private:
CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
const std::set<GLenum>& supported_formats);
const Device& device;
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
ShaderDiskCacheOpenGL disk_cache;

@ -15,6 +15,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/shader/shader_ir.h"
@ -141,8 +142,9 @@ bool IsPrecise(Node node) {
class GLSLDecompiler final {
public:
explicit GLSLDecompiler(const ShaderIR& ir, ShaderStage stage, std::string suffix)
: ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage,
std::string suffix)
: device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
void Decompile() {
DeclareVertex();
@ -819,8 +821,12 @@ private:
// Inline the string as an immediate integer in GLSL (AOFFI arguments are required
// to be constant by the standard).
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
} else {
} else if (device.HasVariableAoffi()) {
// Avoid using variable AOFFI on unsupported devices.
expr += "ftoi(" + Visit(operand) + ')';
} else {
// Insert 0 on devices not supporting variable AOFFI.
expr += '0';
}
if (index + 1 < aoffi.size()) {
expr += ", ";
@ -1609,6 +1615,7 @@ private:
return name + '_' + std::to_string(index) + '_' + suffix;
}
const Device& device;
const ShaderIR& ir;
const ShaderStage stage;
const std::string suffix;
@ -1636,8 +1643,9 @@ std::string GetCommonDeclarations() {
"}\n";
}
ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const std::string& suffix) {
GLSLDecompiler decompiler(ir, stage, suffix);
ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage,
const std::string& suffix) {
GLSLDecompiler decompiler(device, ir, stage, suffix);
decompiler.Decompile();
return {decompiler.GetResult(), decompiler.GetShaderEntries()};
}

@ -12,6 +12,10 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/shader/shader_ir.h"
namespace OpenGL {
class Device;
}
namespace VideoCommon::Shader {
class ShaderIR;
}
@ -65,7 +69,7 @@ struct ShaderEntries {
std::string GetCommonDeclarations();
ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage,
const std::string& suffix);
ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
Maxwell::ShaderStage stage, const std::string& suffix);
} // namespace OpenGL::GLShader

@ -16,7 +16,7 @@ using VideoCommon::Shader::ShaderIR;
static constexpr u32 PROGRAM_OFFSET{10};
ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
@ -34,14 +34,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
)";
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program = Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
ProgramResult program =
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
out += program.first;
if (setup.IsDualProgram()) {
ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
ProgramResult program_b =
Decompile(program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
out += program_b.first;
}
@ -75,7 +76,7 @@ void main() {
return {out, program.second};
}
ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
@ -95,7 +96,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
)";
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
out += program.first;
out += R"(
@ -106,7 +107,7 @@ void main() {
return {out, program.second};
}
ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
@ -158,7 +159,7 @@ bool AlphaFunc(in float value) {
)";
ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
out += program.first;

@ -10,6 +10,10 @@
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/shader/shader_ir.h"
namespace OpenGL {
class Device;
}
namespace OpenGL::GLShader {
using VideoCommon::Shader::ProgramCode;
@ -39,22 +43,13 @@ private:
bool has_program_b{};
};
/**
* Generates the GLSL vertex shader program source code for the given VS program
* @returns String of the shader source code
*/
ProgramResult GenerateVertexShader(const ShaderSetup& setup);
/// Generates the GLSL vertex shader program source code for the given VS program
ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup);
/**
* Generates the GLSL geometry shader program source code for the given GS program
* @returns String of the shader source code
*/
ProgramResult GenerateGeometryShader(const ShaderSetup& setup);
/// Generates the GLSL geometry shader program source code for the given GS program
ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup);
/**
* Generates the GLSL fragment shader program source code for the given FS program
* @returns String of the shader source code
*/
ProgramResult GenerateFragmentShader(const ShaderSetup& setup);
/// Generates the GLSL fragment shader program source code for the given FS program
ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup);
} // namespace OpenGL::GLShader