From d8d557df86139ea0011fee0665d76e740681f7c0 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 27 Oct 2018 20:09:26 -0400 Subject: [PATCH] Improved Shader accuracy on Vertex and Geometry Shaders with FFMA, FMUL and FADD --- .../renderer_opengl/gl_shader_decompiler.cpp | 55 ++++++++++++++++++- .../renderer_opengl/gl_shader_gen.cpp | 9 ++- 2 files changed, 58 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index dec291a7d..002ae90a7 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1510,8 +1510,25 @@ private: ASSERT_MSG(instr.fmul.cc == 0, "FMUL cc is not implemented"); op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); - regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, + + shader.AddLine('{'); + ++shader.scope; + + // This avoids optimizations of constant propagation and keeps the code as the original + // Sadly using the precise keyword causes "linking" errors on fragment shaders. + if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { + shader.AddLine("float tmp = " + op_a + " * " + op_b + ';'); + } else { + shader.AddLine("precise float tmp = " + op_a + " * " + op_b + ';'); + } + + + regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1, instr.alu.saturate_d); + + + --shader.scope; + shader.AddLine('}'); break; } case OpCode::Id::FADD_C: @@ -1519,8 +1536,25 @@ private: case OpCode::Id::FADD_IMM: { op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); - regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, + + shader.AddLine('{'); + ++shader.scope; + + // This avoids optimizations of constant propagation and keeps the code as the original + // Sadly using the precise keyword causes "linking" errors on fragment shaders. + if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { + shader.AddLine("float tmp = " + op_a + " + " + op_b + ';'); + } else { + shader.AddLine("precise float tmp = " + op_a + " + " + op_b + ';'); + } + regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1, instr.alu.saturate_d); + + + --shader.scope; + shader.AddLine('}'); + + break; } case OpCode::Id::MUFU: { @@ -2087,8 +2121,23 @@ private: } } - regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1, + shader.AddLine('{'); + ++shader.scope; + + // This avoids optimizations of constant propagation and keeps the code as the original + // Sadly using the precise keyword causes "linking" errors on fragment shaders. + if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { + shader.AddLine("float tmp = fma(" + op_a + ", " + op_b + ", " + op_c + ");"); + } else { + shader.AddLine("precise float tmp = fma(" + op_a + ", " + op_b + ", " + op_c + ");"); + } + + regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1, instr.alu.saturate_d); + + + --shader.scope; + shader.AddLine('}'); break; } case OpCode::Type::Hfma2: { diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index dfb562706..08dd8dc6c 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -15,7 +15,8 @@ static constexpr u32 PROGRAM_OFFSET{10}; ProgramResult GenerateVertexShader(const ShaderSetup& setup) { std::string out = "#version 430 core\n"; - out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; + out += "#extension GL_ARB_separate_shader_objects : enable\n"; + out += "#extension GL_ARB_gpu_shader5 : enable\n\n"; out += Decompiler::GetCommonDeclarations(); out += R"( @@ -83,7 +84,8 @@ void main() { ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { std::string out = "#version 430 core\n"; - out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; + out += "#extension GL_ARB_separate_shader_objects : enable\n"; + out += "#extension GL_ARB_gpu_shader5 : enable\n\n"; out += Decompiler::GetCommonDeclarations(); out += "bool exec_geometry();\n"; @@ -117,7 +119,8 @@ void main() { ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { std::string out = "#version 430 core\n"; - out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; + out += "#extension GL_ARB_separate_shader_objects : enable\n"; + out += "#extension GL_ARB_gpu_shader5 : enable\n\n"; out += Decompiler::GetCommonDeclarations(); out += "bool exec_fragment();\n";