diff --git a/src/common/bit_field.h b/src/common/bit_field.h index 5638bdbba2..65e357dec9 100644 --- a/src/common/bit_field.h +++ b/src/common/bit_field.h @@ -192,11 +192,6 @@ private: static_assert(position < 8 * sizeof(T), "Invalid position"); static_assert(bits <= 8 * sizeof(T), "Invalid number of bits"); static_assert(bits > 0, "Invalid number of bits"); - static_assert(std::is_pod::value, "Invalid base type"); + static_assert(std::is_trivially_copyable_v, "T must be trivially copyable in a BitField"); }; #pragma pack() - -#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) -static_assert(std::is_trivially_copyable>::value, - "BitField must be trivially copyable"); -#endif diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index eff0c35a15..ed66d893a0 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include #include "common/bit_field.h" @@ -12,14 +13,10 @@ namespace Tegra { namespace Shader { struct Register { - Register() = default; + constexpr Register() = default; constexpr Register(u64 value) : value(value) {} - constexpr u64 GetIndex() const { - return value; - } - constexpr operator u64() const { return value; } @@ -43,13 +40,13 @@ struct Register { } private: - u64 value; + u64 value{}; }; union Attribute { Attribute() = default; - constexpr Attribute(u64 value) : value(value) {} + constexpr explicit Attribute(u64 value) : value(value) {} enum class Index : u64 { Position = 7, @@ -68,7 +65,20 @@ union Attribute { } fmt28; BitField<39, 8, u64> reg; - u64 value; + u64 value{}; +}; + +union Sampler { + Sampler() = default; + + constexpr explicit Sampler(u64 value) : value(value) {} + + enum class Index : u64 { + Sampler_0 = 8, + }; + + BitField<36, 13, Index> index; + u64 value{}; }; union Uniform { @@ -238,7 +248,7 @@ union OpCode { BitField<55, 9, Id> op3; BitField<52, 12, Id> op4; BitField<51, 13, Id> op5; - u64 value; + u64 value{}; }; static_assert(sizeof(OpCode) == 0x8, "Incorrect structure size"); @@ -280,6 +290,7 @@ enum class SubOp : u64 { Lg2 = 0x3, Rcp = 0x4, Rsq = 0x5, + Min = 0x8, }; union Instruction { @@ -295,15 +306,25 @@ union Instruction { BitField<20, 8, Register> gpr20; BitField<20, 7, SubOp> sub_op; BitField<28, 8, Register> gpr28; - BitField<36, 13, u64> imm36; BitField<39, 8, Register> gpr39; union { + BitField<20, 19, u64> imm20; BitField<45, 1, u64> negate_b; BitField<46, 1, u64> abs_a; BitField<48, 1, u64> negate_a; BitField<49, 1, u64> abs_b; BitField<50, 1, u64> abs_d; + BitField<56, 1, u64> negate_imm; + + float GetImm20() const { + float result{}; + u32 imm{static_cast(imm20)}; + imm <<= 12; + imm |= negate_imm ? 0x80000000 : 0; + std::memcpy(&result, &imm, sizeof(imm)); + return result; + } } alu; union { @@ -311,11 +332,13 @@ union Instruction { BitField<49, 1, u64> negate_c; } ffma; + BitField<61, 1, u64> is_b_imm; BitField<60, 1, u64> is_b_gpr; BitField<59, 1, u64> is_c_gpr; Attribute attribute; Uniform uniform; + Sampler sampler; u64 hex; }; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e11711533e..6233ee3585 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -17,6 +17,7 @@ using Tegra::Shader::Attribute; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; +using Tegra::Shader::Sampler; using Tegra::Shader::SubOp; using Tegra::Shader::Uniform; @@ -155,23 +156,27 @@ private: /// Generates code representing an input attribute register. std::string GetInputAttribute(Attribute::Index attribute) { - declr_input_attribute.insert(attribute); + switch (attribute) { + case Attribute::Index::Position: + return "position"; + default: + const u32 index{static_cast(attribute) - + static_cast(Attribute::Index::Attribute_0)}; + if (attribute >= Attribute::Index::Attribute_0) { + declr_input_attribute.insert(attribute); + return "input_attribute_" + std::to_string(index); + } - const u32 index{static_cast(attribute) - - static_cast(Attribute::Index::Attribute_0)}; - if (attribute >= Attribute::Index::Attribute_0) { - return "input_attribute_" + std::to_string(index); + NGLOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index); + UNREACHABLE(); } - - LOG_CRITICAL(HW_GPU, "Unhandled input attribute: 0x%02x", index); - UNREACHABLE(); } /// Generates code representing an output attribute register. std::string GetOutputAttribute(Attribute::Index attribute) { switch (attribute) { case Attribute::Index::Position: - return "gl_Position"; + return "position"; default: const u32 index{static_cast(attribute) - static_cast(Attribute::Index::Attribute_0)}; @@ -180,22 +185,42 @@ private: return "output_attribute_" + std::to_string(index); } - LOG_CRITICAL(HW_GPU, "Unhandled output attribute: 0x%02x", index); + NGLOG_CRITICAL(HW_GPU, "Unhandled output attribute: {}", index); UNREACHABLE(); } } + /// Generates code representing an immediate value + static std::string GetImmediate(const Instruction& instr) { + return std::to_string(instr.alu.GetImm20()); + } + /// Generates code representing a temporary (GPR) register. - std::string GetRegister(const Register& reg) { - return *declr_register.insert("register_" + std::to_string(reg)).first; + std::string GetRegister(const Register& reg, unsigned elem = 0) { + if (stage == Maxwell3D::Regs::ShaderStage::Fragment && reg < 4) { + // GPRs 0-3 are output color for the fragment shader + return std::string{"color."} + "rgba"[(reg + elem) & 3]; + } + + return *declr_register.insert("register_" + std::to_string(reg + elem)).first; } /// Generates code representing a uniform (C buffer) register. std::string GetUniform(const Uniform& reg) { - declr_const_buffers[reg.index].MarkAsUsed(reg.index, reg.offset, stage); + declr_const_buffers[reg.index].MarkAsUsed(static_cast(reg.index), + static_cast(reg.offset), stage); return 'c' + std::to_string(reg.index) + '[' + std::to_string(reg.offset) + ']'; } + /// Generates code representing a texture sampler. + std::string GetSampler(const Sampler& sampler) const { + // TODO(Subv): Support more than just texture sampler 0 + ASSERT_MSG(sampler.index == Sampler::Index::Sampler_0, "unsupported"); + const unsigned index{static_cast(sampler.index.Value()) - + static_cast(Sampler::Index::Sampler_0)}; + return "tex[" + std::to_string(index) + "]"; + } + /** * Adds code that calls a subroutine. * @param subroutine the subroutine to call. @@ -217,12 +242,13 @@ private: * @param value the code representing the value to assign. */ void SetDest(u64 elem, const std::string& reg, const std::string& value, - u64 dest_num_components, u64 value_num_components) { + u64 dest_num_components, u64 value_num_components, bool is_abs = false) { std::string swizzle = "."; swizzle += "xyzw"[elem]; std::string dest = reg + (dest_num_components != 1 ? swizzle : ""); std::string src = "(" + value + ")" + (value_num_components != 1 ? swizzle : ""); + src = is_abs ? "abs(" + src + ")" : src; shader.AddLine(dest + " = " + src + ";"); } @@ -240,8 +266,6 @@ private: switch (OpCode::GetInfo(instr.opcode).type) { case OpCode::Type::Arithmetic: { - ASSERT(!instr.alu.abs_d); - std::string dest = GetRegister(instr.gpr0); std::string op_a = instr.alu.negate_a ? "-" : ""; op_a += GetRegister(instr.gpr8); @@ -250,63 +274,109 @@ private: } std::string op_b = instr.alu.negate_b ? "-" : ""; - if (instr.is_b_gpr) { - op_b += GetRegister(instr.gpr20); + + if (instr.is_b_imm) { + op_b += GetImmediate(instr); } else { - op_b += GetUniform(instr.uniform); + if (instr.is_b_gpr) { + op_b += GetRegister(instr.gpr20); + } else { + op_b += GetUniform(instr.uniform); + } } + if (instr.alu.abs_b) { op_b = "abs(" + op_b + ")"; } switch (instr.opcode.EffectiveOpCode()) { case OpCode::Id::FMUL_C: - case OpCode::Id::FMUL_R: { - SetDest(0, dest, op_a + " * " + op_b, 1, 1); + case OpCode::Id::FMUL_R: + case OpCode::Id::FMUL_IMM: { + SetDest(0, dest, op_a + " * " + op_b, 1, 1, instr.alu.abs_d); break; } case OpCode::Id::FADD_C: - case OpCode::Id::FADD_R: { - SetDest(0, dest, op_a + " + " + op_b, 1, 1); + case OpCode::Id::FADD_R: + case OpCode::Id::FADD_IMM: { + SetDest(0, dest, op_a + " + " + op_b, 1, 1, instr.alu.abs_d); + break; + } + case OpCode::Id::MUFU: { + switch (instr.sub_op) { + case SubOp::Cos: + SetDest(0, dest, "cos(" + op_a + ")", 1, 1, instr.alu.abs_d); + break; + case SubOp::Sin: + SetDest(0, dest, "sin(" + op_a + ")", 1, 1, instr.alu.abs_d); + break; + case SubOp::Ex2: + SetDest(0, dest, "exp2(" + op_a + ")", 1, 1, instr.alu.abs_d); + break; + case SubOp::Lg2: + SetDest(0, dest, "log2(" + op_a + ")", 1, 1, instr.alu.abs_d); + break; + case SubOp::Rcp: + SetDest(0, dest, "1.0 / " + op_a, 1, 1, instr.alu.abs_d); + break; + case SubOp::Rsq: + SetDest(0, dest, "inversesqrt(" + op_a + ")", 1, 1, instr.alu.abs_d); + break; + case SubOp::Min: + SetDest(0, dest, "min(" + op_a + "," + op_b + ")", 1, 1, instr.alu.abs_d); + break; + default: + NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {}", + static_cast(instr.sub_op.Value())); + UNREACHABLE(); + } break; } default: { - LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", - static_cast(instr.opcode.EffectiveOpCode()), - OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); - throw DecompileFail("Unhandled instruction"); - break; + NGLOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {} ({}): {}", + static_cast(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name, instr.hex); + UNREACHABLE(); } } break; } case OpCode::Type::Ffma: { - ASSERT_MSG(!instr.ffma.negate_b, "untested"); - ASSERT_MSG(!instr.ffma.negate_c, "untested"); - std::string dest = GetRegister(instr.gpr0); std::string op_a = GetRegister(instr.gpr8); - std::string op_b = instr.ffma.negate_b ? "-" : ""; - op_b += GetUniform(instr.uniform); - std::string op_c = instr.ffma.negate_c ? "-" : ""; - op_c += GetRegister(instr.gpr39); switch (instr.opcode.EffectiveOpCode()) { case OpCode::Id::FFMA_CR: { - SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1); + op_b += GetUniform(instr.uniform); + op_c += GetRegister(instr.gpr39); break; } + case OpCode::Id::FFMA_RR: { + op_b += GetRegister(instr.gpr20); + op_c += GetRegister(instr.gpr39); + break; + } + case OpCode::Id::FFMA_RC: { + op_b += GetRegister(instr.gpr39); + op_c += GetUniform(instr.uniform); + break; + } + case OpCode::Id::FFMA_IMM: { + op_b += GetImmediate(instr); + op_c += GetRegister(instr.gpr39); + break; + } + default: { + NGLOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {} ({}): {}", + static_cast(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name, instr.hex); + UNREACHABLE(); + } + } - default: { - LOG_CRITICAL(HW_GPU, "Unhandled arithmetic FFMA instruction: 0x%02x (%s): 0x%08x", - static_cast(instr.opcode.EffectiveOpCode()), - OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); - throw DecompileFail("Unhandled instruction"); - break; - } - } + SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1); break; } case OpCode::Type::Memory: { @@ -315,22 +385,33 @@ private: switch (instr.opcode.EffectiveOpCode()) { case OpCode::Id::LD_A: { - ASSERT(instr.attribute.fmt20.size == 0); + ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); SetDest(instr.attribute.fmt20.element, gpr0, GetInputAttribute(attribute), 1, 4); break; } case OpCode::Id::ST_A: { - ASSERT(instr.attribute.fmt20.size == 0); + ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested"); SetDest(instr.attribute.fmt20.element, GetOutputAttribute(attribute), gpr0, 4, 1); break; } - default: { - LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: 0x%02x (%s): 0x%08x", - static_cast(instr.opcode.EffectiveOpCode()), - OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); - throw DecompileFail("Unhandled instruction"); + case OpCode::Id::TEXS: { + ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested"); + const std::string op_a = GetRegister(instr.gpr8); + const std::string op_b = GetRegister(instr.gpr20); + const std::string sampler = GetSampler(instr.sampler); + const std::string coord = "vec2(" + op_a + ", " + op_b + ")"; + const std::string texture = "texture(" + sampler + ", " + coord + ")"; + for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) { + SetDest(elem, GetRegister(instr.gpr0, elem), texture, 1, 4); + } break; } + default: { + NGLOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {} ({}): {}", + static_cast(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name, instr.hex); + UNREACHABLE(); + } } break; } @@ -342,14 +423,18 @@ private: offset = PROGRAM_END - 1; break; } - - default: { - LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", - static_cast(instr.opcode.EffectiveOpCode()), - OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); - throw DecompileFail("Unhandled instruction"); + case OpCode::Id::IPA: { + const auto& attribute = instr.attribute.fmt28; + std::string dest = GetRegister(instr.gpr0); + SetDest(attribute.element, dest, GetInputAttribute(attribute.index), 1, 4); break; } + default: { + NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {} ({}): {}", + static_cast(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name, instr.hex); + UNREACHABLE(); + } } break; @@ -514,7 +599,7 @@ boost::optional DecompileProgram(const ProgramCode& program_code, GLSLGenerator generator(subroutines, program_code, main_offset, stage); return ProgramResult{generator.GetShaderCode(), generator.GetEntries()}; } catch (const DecompileFail& exception) { - LOG_ERROR(HW_GPU, "Shader decompilation failed: %s", exception.what()); + NGLOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what()); } return boost::none; } diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index aeea1c8051..8b7f17601f 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -27,10 +27,13 @@ out gl_PerVertex { vec4 gl_Position; }; +out vec4 position; + void main() { exec_shader(); -} + gl_Position = position; +} )"; out += program.first; return {out, program.second}; @@ -46,6 +49,7 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo .get_value_or({}); out += R"( +in vec4 position; out vec4 color; uniform sampler2D tex[32];